From c4ca7eb348810f4c60c2ba982e0d35a818f469fd Mon Sep 17 00:00:00 2001
From: ALuesink <anneluesink@live.nl>
Date: Fri, 2 Aug 2024 09:28:51 +0200
Subject: [PATCH 01/18] Removed AddOnFunctions folder

---
 .../10-collectSamplesFilled_extraoutput.R     |  94 -----
 .../add_lab_id_and_onderzoeksnummer.R         |  10 -
 DIMS/AddOnFunctions/checkOverlap.R            |  16 -
 DIMS/AddOnFunctions/check_same_samplename.R   |   4 -
 DIMS/AddOnFunctions/create_violin_plots.R     | 116 -------
 DIMS/AddOnFunctions/elementInfo.R             |   6 -
 DIMS/AddOnFunctions/export.R                  |  13 -
 DIMS/AddOnFunctions/findPeaks.Gauss.HPC.R     |  24 --
 DIMS/AddOnFunctions/fit1Peak.R                | 120 -------
 DIMS/AddOnFunctions/fit2G.R                   |  43 ---
 DIMS/AddOnFunctions/fit2peaks.R               | 100 ------
 DIMS/AddOnFunctions/fit3G.R                   |  18 -
 DIMS/AddOnFunctions/fit3peaks.R               | 122 -------
 DIMS/AddOnFunctions/fit4G.R                   |  18 -
 DIMS/AddOnFunctions/fit4peaks.R               | 149 --------
 DIMS/AddOnFunctions/fitG.R                    |  18 -
 DIMS/AddOnFunctions/fitGaussian.R             | 327 ------------------
 DIMS/AddOnFunctions/fitGaussianInit.R         |  60 ----
 DIMS/AddOnFunctions/generateBreaksFwhm.R      |  26 --
 DIMS/AddOnFunctions/generateExcelFile.R       |  24 --
 DIMS/AddOnFunctions/generateGaussian.R        |  48 ---
 DIMS/AddOnFunctions/getArea.R                 |  23 --
 DIMS/AddOnFunctions/getDeltaMZ.R              |   5 -
 DIMS/AddOnFunctions/getFitQuality.R           |  65 ----
 DIMS/AddOnFunctions/getFwhm.R                 |  24 --
 DIMS/AddOnFunctions/getPatients.R             |   9 -
 DIMS/AddOnFunctions/getSD.R                   |   9 -
 .../get_patient_data_to_helix.R               |  31 --
 DIMS/AddOnFunctions/globalAssignments.HPC.R   | 114 ------
 DIMS/AddOnFunctions/iden.code.R               |  45 ---
 DIMS/AddOnFunctions/ident.hires.noise.HPC.R   | 242 -------------
 DIMS/AddOnFunctions/isWithinXppm.R            |  51 ---
 DIMS/AddOnFunctions/is_diagnostic_patient.R   |   6 -
 DIMS/AddOnFunctions/mergeDuplicatedRows.R     |  49 ---
 DIMS/AddOnFunctions/normalization_2.1.R       |  75 ----
 DIMS/AddOnFunctions/optimizeGauss.R           |  11 -
 DIMS/AddOnFunctions/output_helix.R            |  31 --
 DIMS/AddOnFunctions/peak.grouping.Gauss.HPC.R |  88 -----
 DIMS/AddOnFunctions/prepare_alarmvalues.R     |  52 ---
 DIMS/AddOnFunctions/prepare_data.R            |  42 ---
 DIMS/AddOnFunctions/prepare_data_perpage.R    |  47 ---
 DIMS/AddOnFunctions/prepare_toplist.R         |  29 --
 DIMS/AddOnFunctions/remove.dupl.2.1.R         |  32 --
 DIMS/AddOnFunctions/remove.dupl.R             |  39 ---
 DIMS/AddOnFunctions/removeFromRepl.pat.R      |  31 --
 DIMS/AddOnFunctions/replaceZeros.R            |  90 -----
 DIMS/AddOnFunctions/replaceZeros_setseed.R    | 108 ------
 DIMS/AddOnFunctions/run.vbs                   |  22 --
 DIMS/AddOnFunctions/runVBAMacro.R             |  48 ---
 DIMS/AddOnFunctions/searchMZRange.R           | 187 ----------
 DIMS/AddOnFunctions/sourceDir.R               |   8 -
 DIMS/AddOnFunctions/statistics_z.R            |  69 ----
 DIMS/AddOnFunctions/sumCurves.R               |  39 ---
 DIMS/AddOnFunctions/trimZeros.R               |   8 -
 54 files changed, 3085 deletions(-)
 delete mode 100644 DIMS/AddOnFunctions/10-collectSamplesFilled_extraoutput.R
 delete mode 100644 DIMS/AddOnFunctions/add_lab_id_and_onderzoeksnummer.R
 delete mode 100644 DIMS/AddOnFunctions/checkOverlap.R
 delete mode 100644 DIMS/AddOnFunctions/check_same_samplename.R
 delete mode 100644 DIMS/AddOnFunctions/create_violin_plots.R
 delete mode 100644 DIMS/AddOnFunctions/elementInfo.R
 delete mode 100644 DIMS/AddOnFunctions/export.R
 delete mode 100644 DIMS/AddOnFunctions/findPeaks.Gauss.HPC.R
 delete mode 100644 DIMS/AddOnFunctions/fit1Peak.R
 delete mode 100644 DIMS/AddOnFunctions/fit2G.R
 delete mode 100644 DIMS/AddOnFunctions/fit2peaks.R
 delete mode 100644 DIMS/AddOnFunctions/fit3G.R
 delete mode 100644 DIMS/AddOnFunctions/fit3peaks.R
 delete mode 100644 DIMS/AddOnFunctions/fit4G.R
 delete mode 100644 DIMS/AddOnFunctions/fit4peaks.R
 delete mode 100644 DIMS/AddOnFunctions/fitG.R
 delete mode 100644 DIMS/AddOnFunctions/fitGaussian.R
 delete mode 100644 DIMS/AddOnFunctions/fitGaussianInit.R
 delete mode 100644 DIMS/AddOnFunctions/generateBreaksFwhm.R
 delete mode 100644 DIMS/AddOnFunctions/generateExcelFile.R
 delete mode 100644 DIMS/AddOnFunctions/generateGaussian.R
 delete mode 100644 DIMS/AddOnFunctions/getArea.R
 delete mode 100644 DIMS/AddOnFunctions/getDeltaMZ.R
 delete mode 100644 DIMS/AddOnFunctions/getFitQuality.R
 delete mode 100644 DIMS/AddOnFunctions/getFwhm.R
 delete mode 100644 DIMS/AddOnFunctions/getPatients.R
 delete mode 100644 DIMS/AddOnFunctions/getSD.R
 delete mode 100644 DIMS/AddOnFunctions/get_patient_data_to_helix.R
 delete mode 100644 DIMS/AddOnFunctions/globalAssignments.HPC.R
 delete mode 100644 DIMS/AddOnFunctions/iden.code.R
 delete mode 100644 DIMS/AddOnFunctions/ident.hires.noise.HPC.R
 delete mode 100644 DIMS/AddOnFunctions/isWithinXppm.R
 delete mode 100644 DIMS/AddOnFunctions/is_diagnostic_patient.R
 delete mode 100644 DIMS/AddOnFunctions/mergeDuplicatedRows.R
 delete mode 100644 DIMS/AddOnFunctions/normalization_2.1.R
 delete mode 100644 DIMS/AddOnFunctions/optimizeGauss.R
 delete mode 100644 DIMS/AddOnFunctions/output_helix.R
 delete mode 100644 DIMS/AddOnFunctions/peak.grouping.Gauss.HPC.R
 delete mode 100644 DIMS/AddOnFunctions/prepare_alarmvalues.R
 delete mode 100644 DIMS/AddOnFunctions/prepare_data.R
 delete mode 100644 DIMS/AddOnFunctions/prepare_data_perpage.R
 delete mode 100644 DIMS/AddOnFunctions/prepare_toplist.R
 delete mode 100644 DIMS/AddOnFunctions/remove.dupl.2.1.R
 delete mode 100644 DIMS/AddOnFunctions/remove.dupl.R
 delete mode 100644 DIMS/AddOnFunctions/removeFromRepl.pat.R
 delete mode 100644 DIMS/AddOnFunctions/replaceZeros.R
 delete mode 100644 DIMS/AddOnFunctions/replaceZeros_setseed.R
 delete mode 100644 DIMS/AddOnFunctions/run.vbs
 delete mode 100644 DIMS/AddOnFunctions/runVBAMacro.R
 delete mode 100644 DIMS/AddOnFunctions/searchMZRange.R
 delete mode 100644 DIMS/AddOnFunctions/sourceDir.R
 delete mode 100644 DIMS/AddOnFunctions/statistics_z.R
 delete mode 100644 DIMS/AddOnFunctions/sumCurves.R
 delete mode 100644 DIMS/AddOnFunctions/trimZeros.R

diff --git a/DIMS/AddOnFunctions/10-collectSamplesFilled_extraoutput.R b/DIMS/AddOnFunctions/10-collectSamplesFilled_extraoutput.R
deleted file mode 100644
index 21ccdce..0000000
--- a/DIMS/AddOnFunctions/10-collectSamplesFilled_extraoutput.R
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/Rscript
-
-.libPaths(new = "/hpc/local/CentOS7/dbg_mz/R_libs/3.2.2")
-
-# load required packages 
-# none 
-
-# define parameters 
-cmd_args <- commandArgs(trailingOnly = TRUE)
-for (arg in cmd_args) cat("  ", arg, "\n")
-
-outdir <- cmd_args[1]
-scanmode <- cmd_args[2]
-normalization <- cmd_args[3]
-scripts <- cmd_args[4]
-z_score <- as.numeric(cmd_args[5])
-ppm <- as.numeric(cmd_args[6])
-
-#outdir <- "/Users/nunen/Documents/Metab/processed/test_old"
-#scanmode <- "negative"
-#normalization <- "disabled"
-#scripts <- "/Users/nunen/Documents/Metab/DIMS/scripts"
-#db <- "/Users/nunen/Documents/Metab/DIMS/db/HMDB_add_iso_corrNaCl_withIS_withC5OH.RData"
-#z_score <- 0
-
-object.files = list.files(paste(outdir, "9-samplePeaksFilled", sep="/"), full.names=TRUE, pattern=scanmode)
-outlist.tot=NULL
-for (i in 1:length(object.files)) {
-  load(object.files[i])
-  print(print(object.files[i]))
-  outlist.tot = rbind(outlist.tot, final.outlist.idpat3)
-}
-
-source(paste(scripts, "AddOnFunctions/sourceDir.R", sep="/"))
-sourceDir(paste(scripts, "AddOnFunctions", sep="/"))
-
-# remove duplicates
-outlist.tot = mergeDuplicatedRows(outlist.tot)
-
-# sort on mass
-outlist.tot = outlist.tot[order(outlist.tot[,"mzmed.pgrp"]),]
-
-# normalization
-load(paste0(outdir, "/repl.pattern.",scanmode,".RData"))
-
-if (normalization != "disabled") {
-  outlist.tot = normalization_2.1(outlist.tot, fileName, names(repl.pattern.filtered), on=normalization, assi_label="assi_HMDB")
-}
-
-if (z_score == 1) {
-  outlist.stats = statistics_z(outlist.tot, sortCol=NULL, adducts=FALSE)
-  nr.removed.samples=length(which(repl.pattern.filtered[]=="character(0)"))
-  order.index.int=order(colnames(outlist.stats)[8:(length(repl.pattern.filtered)-nr.removed.samples+7)])
-  outlist.stats.more = cbind(outlist.stats[,1:7],
-                             outlist.stats[,(length(repl.pattern.filtered)-nr.removed.samples+8):(length(repl.pattern.filtered)-nr.removed.samples+8+6)],
-                             outlist.stats[,8:(length(repl.pattern.filtered)-nr.removed.samples+7)][order.index.int],
-                             outlist.stats[,(length(repl.pattern.filtered)-nr.removed.samples+5+10):ncol(outlist.stats)])
-  
-  tmp.index=grep("_Zscore", colnames(outlist.stats.more), fixed = TRUE)
-  tmp.index.order=order(colnames(outlist.stats.more[,tmp.index]))
-  tmp = outlist.stats.more[,tmp.index[tmp.index.order]]
-  outlist.stats.more=outlist.stats.more[,-tmp.index]
-  outlist.stats.more=cbind(outlist.stats.more,tmp)
-  outlist.tot = outlist.stats.more
-}
-
-# filter identified compounds
-index.1=which((outlist.tot[,"assi_HMDB"]!="") & (!is.na(outlist.tot[,"assi_HMDB"])))
-index.2=which((outlist.tot[,"iso_HMDB"]!="") & (!is.na(outlist.tot[,"iso_HMDB"])))
-index=union(index.1,index.2)
-outlist.ident = outlist.tot[index,]
-outlist.not.ident = outlist.tot[-index,]
-
-if (z_score == 1) {
-  outlist.ident$ppmdev=as.numeric(outlist.ident$ppmdev)
-  outlist.ident <- outlist.ident[which(outlist.ident["ppmdev"] >= -ppm & outlist.ident["ppmdev"] <= ppm),]
-}
-# NAs in theormz_noise <======================================================================= uitzoeken!!!
-outlist.ident$theormz_noise[which(is.na(outlist.ident$theormz_noise))] = 0
-outlist.ident$theormz_noise=as.numeric(outlist.ident$theormz_noise)
-outlist.ident$theormz_noise[which(is.na(outlist.ident$theormz_noise))] = 0
-outlist.ident$theormz_noise=as.numeric(outlist.ident$theormz_noise)
-
-save(outlist.not.ident, outlist.ident, file=paste(outdir, "/outlist_identified_", scanmode, ".RData", sep=""))
-
-# Extra output in Excel-readable format:
-remove_columns <- c("fq.best", "fq.worst", "mzmin.pgrp", "mzmax.pgrp")
-remove_colindex <- which(colnames(outlist.ident) %in% remove_columns)
-outlist.ident <- outlist.ident[ , -remove_colindex]
-write.table(outlist.ident, file=paste0(outdir, "/outlist_identified_", scanmode, ".txt"), sep="\t", row.names = FALSE)
-remove_colindex <- which(colnames(outlist.not.ident) %in% remove_columns)
-outlist.not.ident <- outlist.not.ident[ , -remove_colindex]
-write.table(outlist.not.ident, file=paste0(outdir, "/outlist_not_identified_", scanmode, ".txt"), sep="\t", row.names = FALSE)
-
diff --git a/DIMS/AddOnFunctions/add_lab_id_and_onderzoeksnummer.R b/DIMS/AddOnFunctions/add_lab_id_and_onderzoeksnummer.R
deleted file mode 100644
index 9d91a55..0000000
--- a/DIMS/AddOnFunctions/add_lab_id_and_onderzoeksnummer.R
+++ /dev/null
@@ -1,10 +0,0 @@
-add_lab_id_and_onderzoeksnummer <- function(df_metabs_helix) {
-  # Split patient number into labnummer and Onderzoeksnummer
-  for (row in 1:nrow(df_metabs_helix)) {
-    df_metabs_helix[row,"labnummer"] <- gsub("^P|\\.[0-9]*", "", df_metabs_helix[row,"Patient"])
-    labnummer_split <- strsplit(as.character(df_metabs_helix[row, "labnummer"]), "M")[[1]]
-    df_metabs_helix[row, "Onderzoeksnummer"] <- paste0("MB", labnummer_split[1], "/", labnummer_split[2])
-  }
-  
-  return(df_metabs_helix)
-}
diff --git a/DIMS/AddOnFunctions/checkOverlap.R b/DIMS/AddOnFunctions/checkOverlap.R
deleted file mode 100644
index f7ef818..0000000
--- a/DIMS/AddOnFunctions/checkOverlap.R
+++ /dev/null
@@ -1,16 +0,0 @@
-checkOverlap <- function(range1,range2){
-  if (length(intersect(range1,range2))==2) {
-    # Overlap
-    # message("Overlap, smaller range is used")
-    if (length(range1) >= length(range2)){
-      range1=range1[-length(range1)]  
-    } else {
-      range2=range2[-1]
-    }
-  } else if (length(intersect(range1,range2))==3){
-    # message("Overlap, smaller range is used")
-    range1=range1[-length(range1)]  
-    range2=range2[-1]
-  }
-  return(list("range1"=range1,"range2"=range2))
-}
diff --git a/DIMS/AddOnFunctions/check_same_samplename.R b/DIMS/AddOnFunctions/check_same_samplename.R
deleted file mode 100644
index 16ae9cf..0000000
--- a/DIMS/AddOnFunctions/check_same_samplename.R
+++ /dev/null
@@ -1,4 +0,0 @@
-# function to test whether intensity and Z-score columns match
-check_same_samplename <- function(int_col_name, zscore_col_name) {
-  paste0(int_col_name, "_Zscore") == zscore_col_name
-}
diff --git a/DIMS/AddOnFunctions/create_violin_plots.R b/DIMS/AddOnFunctions/create_violin_plots.R
deleted file mode 100644
index df398be..0000000
--- a/DIMS/AddOnFunctions/create_violin_plots.R
+++ /dev/null
@@ -1,116 +0,0 @@
-create_violin_plots <- function(pdf_dir, pt_name, metab_perpage, top_metab_pt=NULL) {
-  
-  # set parameters for plots
-  plot_height <- 9.6 
-  plot_width <- 6
-  fontsize <- 1 
-  nr_plots_perpage <- 20
-  circlesize <- 0.8 
-  colors_4plot <- c("#22E4AC", "#00B0F0", "#504FFF","#A704FD","#F36265","#DA0641")
-  #                   green     blue      blue/purple purple    orange    red
-  
-  # patient plots, create the PDF device
-  pt_name_sub <- pt_name
-  suffix <- ""
-  if (grepl("Diagnostics", pdf_dir) & is_diagnostic_patient(pt_name)) {
-    prefix <- "MB"
-    suffix <- "_DIMS_PL_DIAG"
-    # substitute P and M in P2020M00001 into right format for Helix
-    pt_name_sub <- gsub("[PM]", "", pt_name)
-    pt_name_sub <- gsub("\\..*", "", pt_name_sub)
-  } else if (grepl("Diagnostics", pdf_dir)) {
-    prefix <- "Dx_"
-  } else if (grepl("IEM", pdf_dir)) {
-    prefix <- "IEM_"
-  } else {
-    prefix <- "R_"
-  }
-  
-  pdf(paste0(pdf_dir, "/", prefix, pt_name_sub, suffix, ".pdf"),
-      onefile = TRUE,
-      width = plot_width,
-      height = plot_height)
-  
-  # page headers:
-  page_headers <- names(metab_perpage)
-  
-  # put table into PDF file, if not empty
-  if (!is.null(dim(top_metab_pt))) {
-    plot.new()
-    # get the names and numbers in the table aligned
-    table_theme <- ttheme_default(core = list(fg_params = list(hjust=0, x=0.05, fontsize=6)),
-                                  colhead = list(fg_params = list(fontsize=8, fontface="bold")))
-    grid.table(top_metab_pt, theme = table_theme, rows = NULL)
-    # g <- tableGrob(top_metab_pt)
-    # grid.draw(g)
-    text(x=0.45, y=1.02, paste0("Top deviating metabolites for patient: ", pt_name), font=1, cex=1)
-  }
-  
-  # violin plots
-  for (page_index in 1:length(metab_perpage)) {
-    # extract list of metabolites to plot on a page
-    metab_list_2plot <- metab_perpage[[page_index]]
-    # extract original data for patient of interest (pt_name) before cut-offs
-    pt_list_2plot_orig <- metab_list_2plot[which(metab_list_2plot$variable == pt_name), ]
-    # cut off Z-scores higher than 20 or lower than -5 (for nicer plots)
-    metab_list_2plot$value[metab_list_2plot$value >  20] <-  20
-    metab_list_2plot$value[metab_list_2plot$value <  -5] <-  -5
-    # extract data for patient of interest (pt_name)
-    pt_list_2plot <- metab_list_2plot[which(metab_list_2plot$variable == pt_name), ]
-    # restore original Z-score before cut-off, for showing Z-scores in PDF
-    pt_list_2plot$value_orig <- pt_list_2plot_orig$value
-    # remove patient of interest (pt_name) from list; violins will be made up of controls and other patients
-    metab_list_2plot <- metab_list_2plot[-which(metab_list_2plot$variable == pt_name), ]
-    # subtitle per page
-    sub_perpage <- gsub("_", " ", page_headers[page_index])
-    # for IEM plots, put subtitle on two lines
-    sub_perpage <- gsub("probability", "\nprobability", sub_perpage)
-    # add size parameter for showing Z-score of patient per metabolite
-    Z_size <- rep(3, nrow(pt_list_2plot))
-    # set size to 0 if row is empty
-    Z_size[is.na(pt_list_2plot$value)] <- 0
-    
-    # draw violin plot. shape=22 gives square for patient of interest
-    ggplot_object <- ggplot(metab_list_2plot, aes(x=value, y=HMDB_name)) +
-      theme(axis.text.y=element_text(size=rel(fontsize)), plot.caption = element_text(size=rel(fontsize))) +
-      xlim(-5, 20) +
-      geom_violin(scale="width") +
-      geom_point(data = pt_list_2plot, aes(color=value), size = 3.5*circlesize, shape=22, fill="white") +
-      scale_fill_gradientn(colors = colors_4plot, values = NULL, space = "Lab", na.value = "grey50", guide = "colourbar", aesthetics = "colour") +
-      # add Z-score value for patient of interest at x=16
-      geom_text(data = pt_list_2plot, aes(16, label = paste0("Z=", round(value_orig, 2))), hjust = "left", vjust = +0.2, size = Z_size) +
-      # add labels. Use font Courier to get all the plots in the same location.
-      labs(x = "Z-scores", y = "Metabolites", subtitle = sub_perpage, color = "z-score") + 
-      theme(axis.text.y = element_text(family = "Courier", size=6)) +
-      # do not show legend
-      theme(legend.position="none") +
-      # add title 
-      ggtitle(label = paste0("Results for patient ", pt_name)) + 
-      # labs(x = "Z-scores", y = "Metabolites", title = paste0("Results for patient ", pt_name), subtitle = sub_perpage, color = "z-score") + 
-      # add vertical lines
-      geom_vline(xintercept = 2, col = "grey", lwd = 0.5, lty=2) +
-      geom_vline(xintercept = -2, col = "grey", lwd = 0.5, lty=2)
-    
-    suppressWarnings(print(ggplot_object))
-    
-  }
-  
-  # add explanation of violin plots, version number etc.
-  # plot.new()
-  plot(NA, xlim=c(0,5), ylim=c(0,5), bty='n', xaxt='n', yaxt='n', xlab='', ylab='')
-  if (length(explanation) > 0) {
-    text(0.2, 5, explanation[1], pos=4, cex=0.8)
-    for (line_index in 2:length(explanation)) {
-      text_y_position <- 5 - (line_index*0.2)
-      text(-0.2, text_y_position, explanation[line_index], pos=4, cex=0.5)
-    }
-    # full_explanation <- paste(explanation[2:length(explanation)], sep="  \n")
-    # text(0.2, 4, full_explanation, pos=4, cex=0.6)
-    #explanation_grob=textGrob(apply(full_explanation, 2, paste, collapse="\n"))
-    #grid.arrange(explanation_grob)
-  }
-  
-  # close the PDF file
-  dev.off()
-  
-}
diff --git a/DIMS/AddOnFunctions/elementInfo.R b/DIMS/AddOnFunctions/elementInfo.R
deleted file mode 100644
index 18eaeff..0000000
--- a/DIMS/AddOnFunctions/elementInfo.R
+++ /dev/null
@@ -1,6 +0,0 @@
-elementInfo <- function(name, elements = NULL) { # from Rdisop function .getElement  
-  if (!is.list(elements) || length(elements)==0 ) {
-    elements <- initializePSE() }
-  if (name=="CH3OH+H"){rex<-"^CH3OH\\+H$"}else{rex <- paste ("^",name,"$", sep="")}
-  elements [[grep (rex, sapply (elements, function(x) {x$name}))]]
-}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/export.R b/DIMS/AddOnFunctions/export.R
deleted file mode 100644
index cab79a0..0000000
--- a/DIMS/AddOnFunctions/export.R
+++ /dev/null
@@ -1,13 +0,0 @@
-export <- function(peaklist, plotdir, adducts, control_label, case_label, patients, sub, fileName){
-  # peaklist = outlist.adducts
-  # adducts=TRUE
-  # control_label="C"
-  # case_label="P"
-  # patients = getPatients(outlist.adducts)
-  # sub=3000
-  
-  # peaklist = statistics_z_4export(as.data.frame(peaklist), plotdir, patients, adducts, control_label, case_label)
-  
-  # generateExcelFile(peaklist, file.path(plotdir), imageNum=2, fileName, subName=c("","_box"), sub, adducts)
-  
-}
diff --git a/DIMS/AddOnFunctions/findPeaks.Gauss.HPC.R b/DIMS/AddOnFunctions/findPeaks.Gauss.HPC.R
deleted file mode 100644
index c00aa92..0000000
--- a/DIMS/AddOnFunctions/findPeaks.Gauss.HPC.R
+++ /dev/null
@@ -1,24 +0,0 @@
-### fit Gaussian estimate mean and integrate to obtain intensity
-findPeaks.Gauss.HPC <- function(plist, breaks.fwhm, int.factor, scale, resol, outdir, scanmode, plot, thresh, width, height) {
-  sampname <- colnames(plist)[1]
-  
-  range <- as.vector(plist)
-  names(range) <- rownames(plist)
-  
-  values <- list("mean"=NULL, "area"=NULL, "nr"=NULL, "min"=NULL, "max"=NULL, "qual"=NULL, "spikes"=0)
-  
-  values <- searchMZRange(range, values, int.factor, scale, resol, outdir, sampname, scanmode, plot, width, height, thresh)
-  
-  outlist.persample <- NULL
-  outlist.persample <- cbind("samplenr"=values$nr, "mzmed.pkt"=values$mean, "fq"=values$qual, "mzmin.pkt"=values$min, "mzmax.pkt"=values$max, "height.pkt"=values$area)
-
-  index <- which(outlist.persample[ ,"height.pkt"]==0)
-  if (length(index) > 0) {
-    outlist.persample <- outlist.persample[-index,]
-  }
-  
-  # save(outlist.persample, file=paste(outdir, paste(sampname, "_", scanmode, ".RData", sep=""), sep="/"))
-  save(outlist.persample, file=paste("./", sampname, "_", scanmode, ".RData", sep=""))
-  
-  cat(paste("There were", values$spikes, "spikes!"))
-}
diff --git a/DIMS/AddOnFunctions/fit1Peak.R b/DIMS/AddOnFunctions/fit1Peak.R
deleted file mode 100644
index b09ce20..0000000
--- a/DIMS/AddOnFunctions/fit1Peak.R
+++ /dev/null
@@ -1,120 +0,0 @@
-fit1Peak <- function(x2,x,y,index,scale,resol,plot,FQ,useBounds) {
-  #FQ=FQ1
-  
-  if (length(y)<3){
-    message("Range to small, no fit possible!")
-  } else {
-    
-    if ((length(y)==4)) {
-      mu = weighted.mean(x,y)
-      sigma = getSD(x,y)
-      fitP = fitG_2(x,y,sigma,mu,scale,useBounds)
-    } else {
-      
-      if ((length(x) - length(index)) < 2) {
-        range1=c((length(x)-4):length(x))
-      } else if (length(index) < 2) {
-        range1=c(1:5)
-      } else {
-        range1=c(index[1]-2,index[1]-1,index[1],index[1]+1,index[1]+2)
-      }
-      
-      if (range1[1]==0) range1=range1[-1]
-      
-      # remove NA
-      if (length(which(is.na(y[range1])))!=0) range1=range1[-which(is.na(y[range1]))]
-      
-      mu = weighted.mean(x[range1],y[range1])
-      sigma = getSD(x[range1],y[range1])
-      fitP = fitG_2(x,y,sigma,mu,scale,useBounds)
-    }
-    
-    p2 = fitP$par
-    
-    #fq_new = abs(sum(y) - sum(p2[2]*dnorm(x,p2[1],sigma)))/sum(y)
-    fq_new = getFitQuality(x,y,p2[1],p2[1],resol,p2[2],sigma)$fq_new
-    
-    if (plot & (fq_new < FQ)) lines(x2,p2[2]*dnorm(x2,p2[1],sigma), col="green")
-    
-    scale_new = 1.2*scale
-    # cat(fq_new)
-    
-    if (fq_new > FQ) { # <=== bad fit?
-      # optimize scaling factor
-      fq = 0
-      scale = 0
-      
-      if (sum(y)>sum(p2[2]*dnorm(x,p2[1],sigma))){
-        while ((round(fq, digits = 3) != round(fq_new, digits = 3)) & (scale_new<10000))  {
-          fq = fq_new
-          scale = scale_new
-          
-          #cat(scale)
-          fitP = fitG_2(x,y,sigma,mu,scale,useBounds)
-          p2 = fitP$par
-          
-          #fq_new = abs(sum(y) - sum(p2[2]*dnorm(x,p2[1],sigma)))/sum(y)
-          fq_new = getFitQuality(x,y,p2[1],p2[1],resol,p2[2],sigma)$fq_new
-          scale_new=1.2*scale
-          
-          if (plot & (fq_new < FQ)) lines(x2,p2[2]*dnorm(x2,p2[1],sigma), col="green")
-          #           cat(paste("fq_new: ", fq_new))
-          #           cat(paste("scale_new: ", scale_new))
-          #           (round(fq, digits = 4) != round(fq_new, digits = 4))
-        }
-      } else {
-        while ((round(fq, digits = 3) != round(fq_new, digits = 3)) & (scale_new<10000)) {
-          fq = fq_new
-          scale = scale_new
-          
-          # cat(scale)
-          fitP = fitG_2(x,y,sigma,mu,scale,useBounds)
-          p2 = fitP$par
-          
-          #fq_new = abs(sum(y) - sum(p2[2]*dnorm(x,p2[1],sigma)))/sum(y)
-          fq_new = getFitQuality(x,y,p2[1],p2[1],resol,p2[2],sigma)$fq_new
-          scale_new=0.8*scale
-          
-          if (plot & (fq_new < FQ)) lines(x2,p2[2]*dnorm(x2,p2[1],sigma), col="green")
-          # cat(paste("fq_new: ", fq_new))
-        }
-      }
-      
-      if (fq < fq_new) {
-        #         cat(paste("fq_new: ", fq_new))
-        #         cat(paste("fq: ", fq))
-        #         cat(paste("scale_new: ", scale_new))
-        #         cat(paste("scale: ", scale))
-        
-        fitP = fitG_2(x,y,sigma,mu,scale,useBounds)
-        p2 = fitP$par
-        fq_new = fq
-        # cat(paste("==> fq_new: ", fq_new))
-        if (plot & (fq_new < FQ)) lines(x2,p2[2]*dnorm(x2,p2[1],sigma), col="dark green")
-        
-      }
-    }
-    
-    if (plot & (fq_new < FQ)) {
-      # plot ###################
-      #lines(x2,p2[2]*dnorm(x2,p2[1],sigma), col="green")
-      fwhm = getFwhm(p2[1],resol)
-      half_max = max(p2[2]*dnorm(x2,p2[1],sigma))*0.5
-      lines(c(p2[1] - 0.5*fwhm, p2[1] + 0.5*fwhm),c(half_max,half_max),col="orange")
-      abline(v = p2[1], col="green")
-      h=c(paste("mean =", p2[1], sep=" "),
-          paste("fq =", fq_new, sep=" "))
-      legend("topright", legend=h)
-      ##########################
-      
-      
-      #       abline(v = x[6], col="red")
-      #       fwhm = getFwhm(x[6])
-      #       abline(v =x[6] + 0.6*fwhm, col="red")
-      #       abline(v =x[6] - 0.6*fwhm, col="red")
-      
-    }
-  }
-  
-  return(list("mean"=p2[1], "scale"=p2[2], "sigma"=sigma, "qual"=fq_new))
-}
diff --git a/DIMS/AddOnFunctions/fit2G.R b/DIMS/AddOnFunctions/fit2G.R
deleted file mode 100644
index 9dc76dd..0000000
--- a/DIMS/AddOnFunctions/fit2G.R
+++ /dev/null
@@ -1,43 +0,0 @@
-fit2G_2 <- function(x,y,sig1,sig2,mu1,scale1,mu2,scale2,useBounds){
-  
-  f = function(p){
-    d = p[2]*dnorm(x,mean=p[1],sd=sig1) + p[4]*dnorm(x,mean=p[3],sd=sig2)
-    sum((d-y)^2)
-  }
-  
-  if (useBounds){
-    lower = c(x[1],0,x[1],0)
-    upper = c(x[length(x)],Inf,x[length(x)],Inf)
-    
-    if (is.null(mu2) && is.null(scale2) && is.null(sig2)){
-      sig2=sig1
-      optim(c(as.numeric(mu1),
-              as.numeric(scale1),
-              as.numeric(mu1),
-              as.numeric(scale1)),
-            f,control=list(maxit=10000),method="L-BFGS-B",lower=lower,upper=upper)
-    } else {
-      optim(c(as.numeric(mu1),
-              as.numeric(scale1),
-              as.numeric(mu2),
-              as.numeric(scale2)),
-            f,control=list(maxit=10000),method="L-BFGS-B",lower=lower,upper=upper)
-    }
-    
-  } else {
-    if (is.null(mu2) && is.null(scale2) && is.null(sig2)){
-      sig2=sig1
-      optim(c(as.numeric(mu1),
-              as.numeric(scale1),
-              as.numeric(mu1),
-              as.numeric(scale1)),
-            f,control=list(maxit=10000))
-    } else{
-      optim(c(as.numeric(mu1),
-              as.numeric(scale1),
-              as.numeric(mu2),
-              as.numeric(scale2)),
-            f,control=list(maxit=10000))
-    }  
-  }
-}
diff --git a/DIMS/AddOnFunctions/fit2peaks.R b/DIMS/AddOnFunctions/fit2peaks.R
deleted file mode 100644
index 4d9c64d..0000000
--- a/DIMS/AddOnFunctions/fit2peaks.R
+++ /dev/null
@@ -1,100 +0,0 @@
-fit2peaks <- function(x2,x,y,index,scale,resol,useBounds=FALSE,plot=FALSE,FQ,int.factor){
-  
-  peak.mean = NULL
-  peak.area = NULL
-  peak.scale = NULL
-  peak.sigma = NULL
-  
-  range1=c(index[1]-2,index[1]-1,index[1],index[1]+1,index[1]+2)
-  if (range1[1]==0) range1=range1[-1]
-  
-  range2=c(index[2]-2,index[2]-1,index[2],index[2]+1,index[2]+2)
-  
-  if (length(x)<range2[length(range2)]) range2=range2[-length(range2)]
-  
-  range1=checkOverlap(range1,range2)[[1]]
-  range2=checkOverlap(range1,range2)[[2]]
-  
-  # check for negative or 0
-  remove=which(range1<1)
-  if (length(remove)>0) range1=range1[-remove]
-  remove=which(range2<1)
-  if (length(remove)>0) range2=range2[-remove]
-  
-  # remove NA
-  if (length(which(is.na(y[range1])))!=0) range1=range1[-which(is.na(y[range1]))]
-  if (length(which(is.na(y[range2])))!=0) range2=range2[-which(is.na(y[range2]))]
-  
-  mu1 = weighted.mean(x[range1],y[range1])
-  sigma1 = getSD(x[range1],y[range1])
-  
-  #   message(paste("fit2peaks mu =>", mu1))
-  #   message(paste("fit2peaks sigma =>", sigma1))
-  #   message(paste("fit2peaks scale =>", scale))
-  
-  fitP = fitG_2(x[range1],y[range1],sigma1,mu1,scale,useBounds)
-  p = fitP$par
-  
-  mu2 = weighted.mean(x[range2],y[range2])
-  sigma2 = getSD(x[range2],y[range2])
-  fitP = fitG_2(x[range2],y[range2],sigma2,mu2,scale,useBounds)
-  p2 = fitP$par
-  
-  fit2P = fit2G_2(x, y, sigma1, sigma2, p[1], p[2], p2[1], p2[2],useBounds)
-  p3 = fit2P$par
-  
-  if (is.null(sigma2)) sigma2=sigma1
-  
-  
-  # plot ###################
-  sumFit2 = (p3[2]*dnorm(x2,p3[1],sigma1))+(p3[4]*dnorm(x2,p3[3],sigma2))
-  sumFit = (p3[2]*dnorm(x,p3[1],sigma1))+(p3[4]*dnorm(x,p3[3],sigma2))
-  fq=getFitQuality(x,y,sort(c(p3[1],p3[3]))[1],sort(c(p3[1],p3[3]))[2],resol,sumFit=sumFit)$fq_new
-  
-  fwhm = getFwhm(p3[1],resol)
-  half_max = max(p3[2]*dnorm(x2,p3[1],sigma1))*0.5
-  if (plot & (fq < FQ)) lines(c(p3[1] - 0.5*fwhm, p3[1] + 0.5*fwhm),c(half_max,half_max),col="orange")
-  if (plot & (fq < FQ)) lines(x2,p3[4]*dnorm(x2,p3[3],sigma2),col="grey")
-  if (plot & (fq < FQ)) abline(v = p3[3], col="grey")
-  
-  fwhm = getFwhm(p3[3],resol)
-  half_max = max(p3[4]*dnorm(x2,p3[3],sigma2))*0.5
-  if (plot & (fq < FQ)) lines(c(p3[3] - 0.5*fwhm, p3[3] + 0.5*fwhm),c(half_max,half_max),col="orange")
-  
-  if (plot & (fq < FQ)) lines(x2,sumFit2,col="black")
-  
-  if (plot & (fq < FQ)) lines(x2,p3[2]*dnorm(x2,p3[1],sigma1),col="grey")
-  if (plot & (fq < FQ)) abline(v = p3[1], col="grey")
-  
-  
-  h2=c(paste("mean =", p3[1], sep=" "),
-       paste("mean =", p3[3], sep=" "),
-       paste("fq =", fq, sep=" "))
-  
-  if (plot & (fq < FQ)) legend("topright", legend=h2)
-  ##########################
-  
-  #  lines(x2,p3[4]*dnorm(x2,p3[3],sigma2),col="red")
-  #   area1 = sum(p3[2]*dnorm(x2,p3[1],sigma1))
-  #   area2 = sum(p3[4]*dnorm(x2,p3[3],sigma2))
-  
-  #   area1 = max(p3[2]*dnorm(x2,p3[1],sigma1))
-  #   area2 = max(p3[4]*dnorm(x2,p3[3],sigma2))
-  
-  area1 = getArea(p3[1],resol,p3[2],sigma1,int.factor)
-  area2 = getArea(p3[3],resol,p3[4],sigma2,int.factor)
-  
-  peak.area = c(peak.area, area1)
-  peak.area = c(peak.area, area2)
-  
-  peak.mean = c(peak.mean, p3[1])
-  peak.mean = c(peak.mean, p3[3])
-  
-  peak.scale = c(peak.scale, p3[2])
-  peak.scale = c(peak.scale, p3[4])
-  
-  peak.sigma = c(peak.sigma, sigma1)
-  peak.sigma = c(peak.sigma, sigma2)
-  
-  return(list("mean"=peak.mean, "scale"=peak.scale, "sigma"=peak.sigma, "area"=peak.area, "qual"=fq))
-}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/fit3G.R b/DIMS/AddOnFunctions/fit3G.R
deleted file mode 100644
index 05543c5..0000000
--- a/DIMS/AddOnFunctions/fit3G.R
+++ /dev/null
@@ -1,18 +0,0 @@
-fit3G_2 <- function(x,y,sig1,sig2,sig3,mu1,scale1,mu2,scale2,mu3,scale3,useBounds){
-  
-  f = function(p){
-    d = p[2]*dnorm(x,mean=p[1],sd=sig1) + p[4]*dnorm(x,mean=p[3],sd=sig2) + p[6]*dnorm(x,mean=p[5],sd=sig3)
-    sum((d-y)^2)
-  }
-  
-  if (useBounds){
-    lower = c(x[1],0,x[1],0,x[1],0)
-    upper = c(x[length(x)],Inf,x[length(x)],Inf,x[length(x)],Inf)
-    
-    optim(c(mu1,scale1,mu2,scale2,mu3,scale3),f,control=list(maxit=10000),method="L-BFGS-B",lower=lower,upper=upper)
-    
-  } else {
-    optim(c(mu1,scale1,mu2,scale2,mu3,scale3),f,control=list(maxit=10000))
-  }
-  
-}
diff --git a/DIMS/AddOnFunctions/fit3peaks.R b/DIMS/AddOnFunctions/fit3peaks.R
deleted file mode 100644
index e373f14..0000000
--- a/DIMS/AddOnFunctions/fit3peaks.R
+++ /dev/null
@@ -1,122 +0,0 @@
-fit3peaks <- function(x2,x,y,index,scale,resol,useBounds=FALSE,plot=FALSE,FQ,int.factor){
-  
-  peak.mean = NULL
-  peak.area = NULL
-  peak.scale = NULL
-  peak.sigma = NULL
-  
-  range1=c(index[1]-2,index[1]-1,index[1],index[1]+1,index[1]+2)
-  range2=c(index[2]-2,index[2]-1,index[2],index[2]+1,index[2]+2)
-  range3=c(index[3]-2,index[3]-1,index[3],index[3]+1,index[3]+2)
-  
-  remove=which(range1<1)
-  if (length(remove)>0) {
-    range1=range1[-remove]
-  }
-  remove=which(range2<1)
-  if (length(remove)>0) {
-    range2=range2[-remove]
-  }
-  
-  if (length(x)<range3[length(range3)]) range3=range3[-length(range3)]
-  
-  range1=checkOverlap(range1,range2)[[1]]
-  range2=checkOverlap(range1,range2)[[2]]
-  range2=checkOverlap(range2,range3)[[1]]
-  range3=checkOverlap(range2,range3)[[2]]
-  
-  # check for negative or 0
-  remove=which(range1<1)
-  if (length(remove)>0) range1=range1[-remove]
-  remove=which(range2<1)
-  if (length(remove)>0) range2=range2[-remove]
-  remove=which(range3<1)
-  if (length(remove)>0) range3=range3[-remove]
-  
-  # remove NA
-  if (length(which(is.na(y[range1])))!=0) range1=range1[-which(is.na(y[range1]))]
-  if (length(which(is.na(y[range2])))!=0) range2=range2[-which(is.na(y[range2]))]
-  if (length(which(is.na(y[range3])))!=0) range3=range3[-which(is.na(y[range3]))]
-  
-  mu1 = weighted.mean(x[range1],y[range1])
-  sigma1 = getSD(x[range1],y[range1])
-  fitP = fitG_2(x[range1],y[range1],sigma1,mu1,scale,useBounds)
-  p = fitP$par
-  
-  mu2 = weighted.mean(x[range2],y[range2])
-  sigma2 = getSD(x[range2],y[range2])
-  fitP = fitG_2(x[range2],y[range2],sigma2,mu2,scale,useBounds)
-  p2 = fitP$par
-  
-  mu3 = weighted.mean(x[range3],y[range3])
-  sigma3 = getSD(x[range3],y[range3])
-  fitP = fitG_2(x[range3],y[range3],sigma3,mu3,scale,useBounds)
-  p3 = fitP$par
-  
-  fit3P = fit3G_2(x, y, sigma1, sigma2, sigma3, p[1], p[2], p2[1], p2[2], p3[1], p3[2], useBounds)
-  p4 = fit3P$par
-  
-  # plot ##############################
-  sumFit2 = (p4[2]*dnorm(x2,p4[1],sigma1))+(p4[4]*dnorm(x2,p4[3],sigma2))+(p4[6]*dnorm(x2,p4[5],sigma3))
-  sumFit = (p4[2]*dnorm(x,p4[1],sigma1))+(p4[4]*dnorm(x,p4[3],sigma2))+(p4[6]*dnorm(x,p4[5],sigma3))
-  fq=getFitQuality(x,y,sort(c(p4[1],p4[3],p4[5]))[1],sort(c(p4[1],p4[3],p4[5]))[3],resol,sumFit=sumFit)$fq_new
-  
-  if (plot & (fq < FQ)) lines(x2,p4[2]*dnorm(x2,p4[1],sigma1),col="yellow")
-  if (plot & (fq < FQ)) abline(v = p4[1], col="yellow")
-  fwhm = getFwhm(p4[1],resol)
-  half_max = max(p4[2]*dnorm(x2,p4[1],sigma1))*0.5
-  if (plot & (fq < FQ)) lines(c(p4[1] - 0.5*fwhm, p4[1] + 0.5*fwhm),c(half_max,half_max),col="orange")
-  
-  if (plot & (fq < FQ)) lines(x2,p4[4]*dnorm(x2,p4[3],sigma2),col="yellow")
-  if (plot & (fq < FQ)) abline(v = p4[3], col="yellow")
-  fwhm = getFwhm(p4[3],resol)
-  half_max = max(p4[4]*dnorm(x2,p4[3],sigma2))*0.5
-  if (plot & (fq < FQ)) lines(c(p4[3] - 0.5*fwhm, p4[3] + 0.5*fwhm),c(half_max,half_max),col="orange")
-  
-  if (plot & (fq < FQ)) lines(x2,p4[6]*dnorm(x2,p4[5],sigma3),col="yellow")
-  if (plot & (fq < FQ)) abline(v = p4[5], col="yellow")
-  fwhm = getFwhm(p4[5],resol)
-  half_max = max(p4[6]*dnorm(x2,p4[5],sigma3))*0.5
-  if (plot & (fq < FQ)) lines(c(p4[5] - 0.5*fwhm, p4[5] + 0.5*fwhm),c(half_max,half_max),col="orange")
-  
-  if (plot & (fq < FQ)) lines(x2,sumFit2,col="red")
-  
-  h2=c(paste("mean =", p4[1], sep=" "),
-       paste("mean =", p4[3], sep=" "),
-       paste("mean =", p4[5], sep=" "),
-       paste("fq =", fq, sep=" "))
-  
-  if (plot & (fq < FQ)) legend("topright", legend=h2)
-  #########################################
-  
-  #   area1 = sum(p4[2]*dnorm(x2,p4[1],sigma1))
-  #   area2 = sum(p4[4]*dnorm(x2,p4[3],sigma2))
-  #   area3 = sum(p4[6]*dnorm(x2,p4[5],sigma3))
-  
-  #   area1 = max(p4[2]*dnorm(x2,p4[1],sigma1))
-  #   area2 = max(p4[4]*dnorm(x2,p4[3],sigma2))
-  #   area3 = max(p4[6]*dnorm(x2,p4[5],sigma3))
-  
-  area1 = getArea(p4[1],resol,p4[2],sigma1,int.factor)
-  area2 = getArea(p4[3],resol,p4[4],sigma2,int.factor)
-  area3 = getArea(p4[5],resol,p4[6],sigma3,int.factor)
-  
-  peak.area = c(peak.area, area1)
-  peak.area = c(peak.area, area2)
-  peak.area = c(peak.area, area3)
-  
-  peak.mean = c(peak.mean, p4[1])
-  peak.mean = c(peak.mean, p4[3])
-  peak.mean = c(peak.mean, p4[5])
-  
-  peak.scale = c(peak.scale, p4[2])
-  peak.scale = c(peak.scale, p4[4])
-  peak.scale = c(peak.scale, p4[6])
-  
-  peak.sigma = c(peak.sigma, sigma1)
-  peak.sigma = c(peak.sigma, sigma2)
-  peak.sigma = c(peak.sigma, sigma3)
-  
-  return(list("mean"=peak.mean, "scale"=peak.scale, "sigma"=peak.sigma, "area"=peak.area, "qual"=fq))
-  
-}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/fit4G.R b/DIMS/AddOnFunctions/fit4G.R
deleted file mode 100644
index 6cad63d..0000000
--- a/DIMS/AddOnFunctions/fit4G.R
+++ /dev/null
@@ -1,18 +0,0 @@
-fit4G_2 <- function(x,y,sig1,sig2,sig3,sig4,mu1,scale1,mu2,scale2,mu3,scale3,mu4,scale4,useBounds){
-  
-  f = function(p){
-    d = p[2]*dnorm(x,mean=p[1],sd=sig1) + p[4]*dnorm(x,mean=p[3],sd=sig2) + p[6]*dnorm(x,mean=p[5],sd=sig3) + p[8]*dnorm(x,mean=p[7],sd=sig4)
-    sum((d-y)^2)
-  }
-  
-  if (useBounds){
-    lower = c(x[1],0,x[1],0,x[1],0,x[1],0)
-    upper = c(x[length(x)],Inf,x[length(x)],Inf,x[length(x)],Inf,x[length(x)],Inf)
-    
-    optim(c(mu1,scale1,mu2,scale2,mu3,scale3,mu4,scale4),f,control=list(maxit=10000),method="L-BFGS-B",lower=lower,upper=upper)
-    
-  } else {
-    optim(c(mu1,scale1,mu2,scale2,mu3,scale3,mu4,scale4),f,control=list(maxit=10000))
-  }
-  
-}
diff --git a/DIMS/AddOnFunctions/fit4peaks.R b/DIMS/AddOnFunctions/fit4peaks.R
deleted file mode 100644
index a506541..0000000
--- a/DIMS/AddOnFunctions/fit4peaks.R
+++ /dev/null
@@ -1,149 +0,0 @@
-fit4peaks <- function(x2,x,y,index,scale,resol,useBounds=FALSE,plot=FALSE,FQ,int.factor) {
-  
-  peak.mean = NULL
-  peak.area = NULL
-  peak.scale = NULL
-  peak.sigma = NULL
-  
-  range1=c(index[1]-2,index[1]-1,index[1],index[1]+1,index[1]+2)
-  range2=c(index[2]-2,index[2]-1,index[2],index[2]+1,index[2]+2)
-  range3=c(index[3]-2,index[3]-1,index[3],index[3]+1,index[3]+2)
-  range4=c(index[4]-2,index[4]-1,index[4],index[4]+1,index[4]+2)
-  if (range1[1]==0) range1=range1[-1]
-  if (length(x)<range4[length(range4)]) range4=range4[-length(range4)]
-  
-  range1=checkOverlap(range1,range2)[[1]]
-  range2=checkOverlap(range1,range2)[[2]]
-  range2=checkOverlap(range2,range3)[[1]]
-  range3=checkOverlap(range2,range3)[[2]]
-  range3=checkOverlap(range3,range4)[[1]]
-  range4=checkOverlap(range3,range4)[[2]]
-  
-  remove=which(range4>length(x))
-  if (length(remove)>0) {
-    range4=range4[-remove]
-    # message(length(range4))
-  }
-  
-  # check for negative or 0
-  remove=which(range1<1)
-  if (length(remove)>0) range1=range1[-remove]
-  remove=which(range2<1)
-  if (length(remove)>0) range2=range2[-remove]
-  remove=which(range3<1)
-  if (length(remove)>0) range3=range3[-remove]
-  remove=which(range4<1)
-  if (length(remove)>0) range4=range4[-remove]
-  
-  # remove NA
-  if (length(which(is.na(y[range1])))!=0) range1=range1[-which(is.na(y[range1]))]
-  if (length(which(is.na(y[range2])))!=0) range2=range2[-which(is.na(y[range2]))]
-  if (length(which(is.na(y[range3])))!=0) range3=range3[-which(is.na(y[range3]))]
-  if (length(which(is.na(y[range4])))!=0) range4=range4[-which(is.na(y[range4]))]
-  
-  mu1 = weighted.mean(x[range1],y[range1])
-  sigma1 = getSD(x[range1],y[range1])
-  fitP = fitG_2(x[range1],y[range1],sigma1,mu1,scale,useBounds)
-  p = fitP$par
-  
-  mu2 = weighted.mean(x[range2],y[range2])
-  sigma2 = getSD(x[range2],y[range2])
-  fitP = fitG_2(x[range2],y[range2],sigma2,mu2,scale,useBounds)
-  p2 = fitP$par
-  
-  mu3 = weighted.mean(x[range3],y[range3])
-  sigma3 = getSD(x[range3],y[range3])
-  fitP = fitG_2(x[range3],y[range3],sigma3,mu3,scale,useBounds)
-  p3 = fitP$par
-  
-  mu4 = weighted.mean(x[range4],y[range4])
-  sigma4 = getSD(x[range4],y[range4])
-  fitP = fitG_2(x[range4],y[range4],sigma4,mu4,scale,useBounds)
-  p4 = fitP$par
-  
-  fit4P = fit4G_2(x, y, sigma1, sigma2, sigma3, sigma3, p[1], p[2], p2[1], p2[2], p3[1], p3[2],  p4[1], p4[2], useBounds)
-  p5 = fit4P$par
-  
-  # plot #####################################
-  sumFit2 = (p5[2]*dnorm(x2,p5[1],sigma1))+(p5[4]*dnorm(x2,p5[3],sigma2))+(p5[6]*dnorm(x2,p5[5],sigma3))+(p5[8]*dnorm(x2,p5[7],sigma3))
-  sumFit = (p5[2]*dnorm(x,p5[1],sigma1))+(p5[4]*dnorm(x,p5[3],sigma2))+(p5[6]*dnorm(x,p5[5],sigma3))+(p5[8]*dnorm(x,p5[7],sigma3))
-  fq=getFitQuality(x,y,sort(c(p5[1],p5[3],p5[5],p5[7]))[1],sort(c(p5[1],p5[3],p5[5],p5[7]))[4],resol,sumFit=sumFit)$fq_new
-  
-  if (plot & (fq < FQ)) lines(x2,p5[2]*dnorm(x2,p5[1],sigma1),col="purple")
-  if (plot & (fq < FQ)) abline(v = p5[1], col="purple")
-  fwhm = getFwhm(p5[1],resol)
-  half_max = max(p5[2]*dnorm(x2,p5[1],sigma1))*0.5
-  if (plot & (fq < FQ)) lines(c(p5[1] - 0.5*fwhm, p5[1] + 0.5*fwhm),c(half_max,half_max),col="orange")
-  
-  if (plot & (fq < FQ)) lines(x2,p5[4]*dnorm(x2,p5[3],sigma2),col="purple")
-  if (plot & (fq < FQ)) abline(v = p5[3], col="purple")
-  fwhm = getFwhm(p5[3],resol)
-  half_max = max(p5[4]*dnorm(x2,p5[3],sigma2))*0.5
-  if (plot & (fq < FQ)) lines(c(p5[3] - 0.5*fwhm, p5[3] + 0.5*fwhm),c(half_max,half_max),col="orange")
-  
-  if (plot & (fq < FQ)) lines(x2,p5[6]*dnorm(x2,p5[5],sigma3),col="purple")
-  if (plot & (fq < FQ)) abline(v = p5[5], col="purple")
-  fwhm = getFwhm(p5[5],resol)
-  half_max = max(p5[6]*dnorm(x2,p5[5],sigma3))*0.5
-  if (plot & (fq < FQ)) lines(c(p5[5] - 0.5*fwhm, p5[5] + 0.5*fwhm),c(half_max,half_max),col="orange")
-  
-  if (plot & (fq < FQ)) lines(x2,p5[8]*dnorm(x2,p5[7],sigma3),col="purple")
-  if (plot & (fq < FQ)) abline(v = p5[7], col="purple")
-  fwhm = getFwhm(p5[7],resol)
-  half_max = max(p5[8]*dnorm(x2,p5[7],sigma3))*0.5
-  if (plot & (fq < FQ)) lines(c(p5[7] - 0.5*fwhm, p5[7] + 0.5*fwhm),c(half_max,half_max),col="orange")
-  
-  if (plot & (fq < FQ)) lines(x2,sumFit2,col="blue")
-  
-  #fq = abs(sum(y) - sum(sumFit))/sum(y)
-  #fq=abs(sum(y) - sum(sumFit))/sum(y)
-  #fq=mean(abs(sumFit - y)/sumFit)
-  
-  
-  h2=c(paste("mean =", p5[1], sep=" "),
-       paste("mean =", p5[3], sep=" "),
-       paste("mean =", p5[5], sep=" "),
-       paste("mean =", p5[7], sep=" "),
-       paste("fq =", fq, sep=" "))
-  
-  if (plot & (fq < FQ)) legend("topright", legend=h2)
-  #############################################
-  
-  #   area1 = sum(p5[2]*dnorm(x2,p5[1],sigma1))
-  #   area2 = sum(p5[4]*dnorm(x2,p5[3],sigma2))
-  #   area3 = sum(p5[6]*dnorm(x2,p5[5],sigma3))
-  #   area4 = sum(p5[8]*dnorm(x2,p5[7],sigma4))
-  
-  #   area1 = max(p5[2]*dnorm(x2,p5[1],sigma1))
-  #   area2 = max(p5[4]*dnorm(x2,p5[3],sigma2))
-  #   area3 = max(p5[6]*dnorm(x2,p5[5],sigma3))
-  #   area4 = max(p5[8]*dnorm(x2,p5[7],sigma4))
-  
-  area1 = getArea(p5[1],resol,p5[2],sigma1,int.factor)
-  area2 = getArea(p5[3],resol,p5[4],sigma2,int.factor)
-  area3 = getArea(p5[5],resol,p5[6],sigma3,int.factor)
-  area4 = getArea(p5[7],resol,p5[8],sigma4,int.factor)
-  
-  peak.area = c(peak.area, area1)
-  peak.area = c(peak.area, area2)
-  peak.area = c(peak.area, area3)
-  peak.area = c(peak.area, area4)
-  
-  peak.mean = c(peak.mean, p5[1])
-  peak.mean = c(peak.mean, p5[3])
-  peak.mean = c(peak.mean, p5[5])
-  peak.mean = c(peak.mean, p5[7])
-  
-  peak.scale = c(peak.scale, p5[2])
-  peak.scale = c(peak.scale, p5[4])
-  peak.scale = c(peak.scale, p5[6])
-  peak.scale = c(peak.scale, p5[8])
-  
-  peak.sigma = c(peak.sigma, sigma1)
-  peak.sigma = c(peak.sigma, sigma2)
-  peak.sigma = c(peak.sigma, sigma3)
-  peak.sigma = c(peak.sigma, sigma4)
-  
-  return(list("mean"=peak.mean, "scale"=peak.scale, "sigma"=peak.sigma, "area"=peak.area, "qual"=fq))
-  
-}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/fitG.R b/DIMS/AddOnFunctions/fitG.R
deleted file mode 100644
index b1b595d..0000000
--- a/DIMS/AddOnFunctions/fitG.R
+++ /dev/null
@@ -1,18 +0,0 @@
-fitG_2 <- function(x,y,sig,mu,scale,useBounds) {
-  
-  f = function(p) {
-    d = p[2]*dnorm(x,mean=p[1],sd=sig)
-    sum((d-y)^2)
-  }
-  
-  if (useBounds){
-    lower = c(x[1],0,x[1],0)
-    upper = c(x[length(x)],Inf,x[length(x)],Inf)
-    
-    optim(c(as.numeric(mu), as.numeric(scale)),
-          f,control=list(maxit=10000),method="L-BFGS-B",lower=lower,upper=upper)
-  } else {  
-    #optim(c(mu,scale),f)
-    optim(c(as.numeric(mu),as.numeric(scale)),f,control=list(maxit=10000))
-  }  
-}
diff --git a/DIMS/AddOnFunctions/fitGaussian.R b/DIMS/AddOnFunctions/fitGaussian.R
deleted file mode 100644
index 3c5027d..0000000
--- a/DIMS/AddOnFunctions/fitGaussian.R
+++ /dev/null
@@ -1,327 +0,0 @@
-fitGaussian <- function(x2,x,y,index,scale,resol,outdir,force,useBounds,plot,scanmode,int.factor,width,height) {
-  # force=length(index)
-  # useBounds=FALSE
-  
-  peak.mean = NULL
-  peak.area = NULL
-  peak.qual = NULL
-  peak.min = NULL
-  peak.max = NULL
-  
-  FQ1 = 0.15
-  FQ = 0.2
-  
-  # One local max
-  if (force==1){
-    
-    retVal = fit1Peak(x2,x,y,index,scale,resol,plot,FQ1,useBounds)
-    
-    scale = 2
-    
-    if (retVal$mean[1]<x[1] | retVal$mean[1]>x[length(x)]) {   # <=== mean outside range
-      
-      # do it again
-      return(fitGaussian(x2,x,y,index,scale,resol,outdir,force=1,useBounds=TRUE,plot,scanmode,int.factor,width,height))
-      
-    } else { # <=== mean within range
-      
-      if (retVal$qual > FQ1){ # <=== bad fit
-        
-        # Try to fit two curves
-        
-        # diff(diff(x)) essentially computes the discrete analogue of the second derivative, so should be negative at local maxima.
-        # The +1 below takes care of the fact that the result of diff is shorter than the input vector.
-        new_index=which(diff(sign(diff(y)))==-2)+1
-        
-        if (length(new_index)!=2) {
-          new_index = round(length(x)/3)
-          new_index = c(new_index, 2*new_index)
-        }
-        
-        #length(new_index)  
-        return(fitGaussian(x2,x,y,new_index,scale,resol,outdir,force=2,useBounds=FALSE,plot,scanmode,int.factor,width,height))
-        
-      } else { # <=== good fit
-        
-        peak.mean = c(peak.mean, retVal$mean)
-        #peak.area = c(peak.area, sum(retVal$scale*dnorm(x2,retVal$mean,retVal$sigma)))
-        # "Centroid mode"
-        #         peak.area = c(peak.area, max(retVal$scale*dnorm(x2,retVal$mean,retVal$sigma)))
-        peak.area = c(peak.area, getArea(retVal$mean,resol,retVal$scale,retVal$sigma,int.factor))
-        peak.qual = retVal$qual
-        peak.min = x[1]
-        peak.max = x[length(x)]
-      }
-    }
-    
-    # Two local max  
-  } else if (force==2 & (length(x)>6)) {
-    
-    # fit two curves
-    retVal = fit2peaks(x2,x,y,index,scale,resol,useBounds,plot,FQ,int.factor) # <=== fit 2 curves
-    
-    if (retVal$mean[1]<x[1] | retVal$mean[1]>x[length(x)] |   # <=== one of means outside range
-        retVal$mean[2]<x[1] | retVal$mean[2]>x[length(x)]) {
-      
-      # check quality
-      if (retVal$qual > FQ) { # <=== bad fit 
-        
-        # do it again
-        return(fitGaussian(x2,x,y,index,scale,resol,outdir,force=2,useBounds=TRUE,plot,scanmode,int.factor,width,height))
-        
-        # good fit
-      } else {
-        
-        # check which mean is outside range
-        # Todo ========> check this!!!!!!!!!!!!!!!
-        for (i in 1:length(retVal$mean)){
-          if (retVal$mean[i]<x[1] | retVal$mean[i]>x[length(x)] ) {
-            peak.mean = c(peak.mean, -i)
-            peak.area = c(peak.area, -i)
-          } else {
-            peak.mean = c(peak.mean, retVal$mean[i])
-            peak.area = c(peak.area, retVal$area[i])
-          }
-        }
-        peak.qual = retVal$qual
-        peak.min = x[1]
-        peak.max = x[length(x)]
-      }
-    } else { # <=== all means within range
-      
-      if (retVal$qual > FQ) { # <=== bad fit 
-        
-        # Try to fit three curves
-        new_index=which(diff(sign(diff(y)))==-2)+1
-        
-        if (length(new_index)!=3) {
-          new_index = round(length(x)/4)
-          new_index = c(new_index, 2*new_index, 3*new_index)
-        }
-        
-        #length(new_index)      
-        return(fitGaussian(x2,x,y,new_index,scale,resol,outdir,force=3,useBounds=FALSE,plot,scanmode,int.factor,width,height))
-        
-      } else { # <======== good fit
-        
-        # check if means are within 3 ppm and sum if so  
-        tmp = retVal$qual 
-        
-        nMeanNew = -1
-        nMean = length(retVal$mean)
-        while (nMean!=nMeanNew){
-          nMean = length(retVal$mean)
-          retVal = isWithinXppm(retVal$mean, retVal$scale, retVal$sigma, retVal$area, x2, x, ppm=4, resol, plot)
-          nMeanNew = length(retVal$mean)
-        }
-        
-        retVal$qual = tmp
-        
-        h2=NULL
-        
-        for (i in 1:length(retVal$mean)){
-          h2 = c(h2, paste("mean =", retVal$mean[i], sep=" "))
-          
-          peak.mean = c(peak.mean, retVal$mean[i])
-          peak.area = c(peak.area, retVal$area[i])
-        }
-        peak.qual = retVal$qual
-        peak.min = x[1]
-        peak.max = x[length(x)]
-        
-        h2 = c(h2, paste("fq =", retVal$qual, sep=" "))
-        if (plot) legend("topright", legend=h2)
-      }  
-    }
-    
-    # Three local max  
-  } else if (force==3 & (length(x)>6)){
-    
-    retVal = fit3peaks(x2,x,y,index,scale,resol,useBounds,plot,FQ,int.factor)
-    
-    # outside range
-    if (retVal$mean[1]<x[1] | retVal$mean[1]>x[length(x)] | # <=== one of means outside range
-        retVal$mean[2]<x[1] | retVal$mean[2]>x[length(x)] |
-        retVal$mean[3]<x[1] | retVal$mean[3]>x[length(x)]) {
-      
-      # check quality
-      if (retVal$qual > FQ) { # <=== bad fit 
-        
-        # do it again
-        return(fitGaussian(x2,x,y,index,scale,resol,outdir,force,useBounds=TRUE,plot,scanmode,int.factor,width,height))
-        
-        # good fit
-      } else {
-        
-        # check which mean is outside range
-        # Todo ========> check this!!!!!!!!!!!!!!!
-        for (i in 1:length(retVal$mean)){
-          if (retVal$mean[i]<x[1] | retVal$mean[i]>x[length(x)] ) {
-            peak.mean = c(peak.mean, -i)
-            peak.area = c(peak.area, -i)
-          } else {
-            peak.mean = c(peak.mean, retVal$mean[i])
-            peak.area = c(peak.area, retVal$area[i])
-          }
-        }
-        peak.qual = retVal$qual
-        peak.min = x[1]
-        peak.max = x[length(x)]
-      }
-      
-    } else { # <=== all means within range
-      
-      if (retVal$qual > FQ) { # <=== bad fit 
-        
-        # Try to fit four curves
-        new_index=which(diff(sign(diff(y)))==-2)+1
-        
-        if (length(new_index)!=4) {
-          new_index = round(length(x)/5)
-          new_index = c(new_index, 2*new_index, 3*new_index, 4*new_index)
-        }
-        
-        #length(new_index)
-        return(fitGaussian(x2,x,y,new_index,scale,resol,outdir,force=4,useBounds=FALSE,plot,scanmode,int.factor,width,height))
-        
-      } else { # <======== good fit
-        
-        # check if means are within 3 ppm and sum if so  
-        tmp = retVal$qual 
-        
-        nMeanNew = -1
-        nMean = length(retVal$mean)
-        while (nMean!=nMeanNew){
-          nMean = length(retVal$mean)
-          retVal = isWithinXppm(retVal$mean, retVal$scale, retVal$sigma, retVal$area, x2, x, ppm=4, resol, plot)
-          nMeanNew = length(retVal$mean)
-        }
-        
-        retVal$qual = tmp
-        
-        h2=NULL
-        #         peak.mean=NULL
-        #         peak.area=NULL
-        
-        for (i in 1:length(retVal$mean)){
-          h2 = c(h2, paste("mean =", retVal$mean[i], sep=" "))
-          
-          peak.mean = c(peak.mean, retVal$mean[i])
-          peak.area = c(peak.area, retVal$area[i])
-        }
-        peak.qual = retVal$qual
-        peak.min = x[1]
-        peak.max = x[length(x)]
-        
-        h2 = c(h2, paste("fq =", retVal$qual, sep=" "))
-        if (plot) legend("topright", legend=h2)
-        
-      }  
-    }
-    
-    # Four local max  
-  } else if (force==4 & (length(x)>6)){
-    
-    retVal = fit4peaks(x2,x,y,index,scale,resol,useBounds,plot,FQ,int.factor)
-    
-    if (retVal$mean[1]<x[1] | retVal$mean[1]>x[length(x)] |
-        retVal$mean[2]<x[1] | retVal$mean[2]>x[length(x)] |
-        retVal$mean[3]<x[1] | retVal$mean[3]>x[length(x)] |
-        retVal$mean[4]<x[1] | retVal$mean[4]>x[length(x)]) {
-      
-      # check quality
-      if (retVal$qual > FQ) { # <=== bad fit 
-        
-        # do it again
-        return(fitGaussian(x2,x,y,index,scale,resol,outdir,force,useBounds=TRUE,plot,scanmode,int.factor,width,height))
-        
-        # good fit
-      } else {
-        
-        # check which mean is outside range
-        # Todo ========> check this!!!!!!!!!!!!!!!
-        for (i in 1:length(retVal$mean)){
-          if (retVal$mean[i]<x[1] | retVal$mean[i]>x[length(x)] ) {
-            peak.mean = c(peak.mean, -i)
-            peak.area = c(peak.area, -i)
-          } else {
-            peak.mean = c(peak.mean, retVal$mean[i])
-            peak.area = c(peak.area, retVal$area[i])
-          }
-        }
-        peak.qual = retVal$qual
-        peak.min = x[1]
-        peak.max = x[length(x)]
-      }
-      
-    } else {
-      
-      if (retVal$qual > FQ) { # <=== bad fit 
-        
-        # Generate 1 curve
-        return(fitGaussian(x2,x,y,index,scale,resol,outdir,force=5,useBounds=FALSE,plot,scanmode,int.factor,width,height))
-        
-      } else { # <======== good fit
-        
-        # check if means are within 3 ppm and sum if so  
-        tmp = retVal$qual 
-        
-        nMeanNew = -1
-        nMean = length(retVal$mean)
-        while (nMean!=nMeanNew){
-          nMean = length(retVal$mean)
-          retVal = isWithinXppm(retVal$mean, retVal$scale, retVal$sigma, retVal$area, x2, x, ppm=4, resol, plot)
-          nMeanNew = length(retVal$mean)
-        }
-        
-        retVal$qual = tmp
-        
-        h2=NULL
-        
-        for (i in 1:length(retVal$mean)){
-          h2 = c(h2, paste("mean =", retVal$mean[i], sep=" "))
-          
-          peak.mean = c(peak.mean, retVal$mean[i])
-          peak.area = c(peak.area, retVal$area[i])
-        }
-        peak.qual = retVal$qual
-        peak.min = x[1]
-        peak.max = x[length(x)]
-        
-        h2 = c(h2, paste("fq =", retVal$qual, sep=" "))
-        if (plot) legend("topright", legend=h2)
-      }  
-    }
-    
-    # More then four local max  
-  } else {
-    
-    scale=2
-    FQ1=0.40
-    useBounds=TRUE
-    index=which(y==max(y))
-    retVal = fit1Peak(x2,x,y,index,scale,resol,plot,FQ1,useBounds)
-    
-    if (retVal$qual > FQ1){ # <=== bad fit
-      
-      if (plot) dev.off()    
-      
-      rval = generateGaussian(x,y,resol,plot,scanmode,int.factor, width, height)
-      peak.mean = c(peak.mean, rval$mean)
-      peak.area = c(peak.area, rval$area)
-      peak.min = rval$min
-      peak.max = rval$max
-      peak.qual = 0
-      
-    } else { # <=== good fit
-      
-      peak.mean = c(peak.mean, retVal$mean)
-      peak.area = c(peak.area, getArea(retVal$mean,resol,retVal$scale,retVal$sigma,int.factor))
-      peak.qual = retVal$qual
-      peak.min = x[1]
-      peak.max = x[length(x)]
-    }
-  }
-  
-  return(list("mean"=peak.mean, "area"=peak.area, "qual" = peak.qual, "min"=peak.min, "max"=peak.max))
-}
diff --git a/DIMS/AddOnFunctions/fitGaussianInit.R b/DIMS/AddOnFunctions/fitGaussianInit.R
deleted file mode 100644
index 1c92bce..0000000
--- a/DIMS/AddOnFunctions/fitGaussianInit.R
+++ /dev/null
@@ -1,60 +0,0 @@
-fitGaussianInit <- function(x,y,int.factor,scale,resol,outdir,sampname,scanmode,plot,width,height,i,start,end) {
-  # scanmode="negative"
-  
-  mz.range = x[length(x)] - x[1]
-  x2 = seq(x[1],x[length(x)],length=mz.range*int.factor)
-  
-  #   # diff(diff(x)) essentially computes the discrete analogue of the second derivative, so should be negative at local maxima.
-  #   # The +1 below takes care of the fact that the result of diff is shorter than the input vector.
-  #   index=which(diff(sign(diff(y)))==-2)+1
-  
-  # Alway try to fit one curve first
-  index = which(y==max(y))
-  
-  if (scanmode=="positive"){
-    plot_label="pos_fit.png"
-  } else {
-    plot_label="neg_fit.png"
-  }
-  
-  if (plot) {
-    CairoPNG(filename=paste(outdir,"Gaussian_fit",paste(sampname, x[1], plot_label, sep="_"), sep="/"), width, height)
-    plot(x,y,xlab="m/z",ylab="I", ylim=c(0,1.5*max(y)),main=paste(i,start,end, sep=" ")) #, ylim=c(0,1e+05)
-  }
-  
-  retVal = fitGaussian(x2,x,y,index,scale,resol,outdir,force=length(index),useBounds=FALSE,plot,scanmode,int.factor,width,height)
-  
-  if (plot) {
-    if (length(retVal$mean)==1) {
-      
-      result = tryCatch(dev.off(), warning = function(w){},
-                        error=function(e){},
-                        finally = {})
-      
-      # file.rename(paste(outdir,"Gaussian_fit", paste(sampname, x[1], plot_label, sep="_"), sep="/"),
-      #             paste(outdir,"Gaussian_fit", paste(sampname, round(retVal$mean,digits = 6), plot_label, sep="_"), sep="/"))
-    } else {
-      
-      
-      h2=NULL
-      for (i in 1:length(retVal$mean)){
-        h2=c(h2, paste("mean =", retVal$mean[i], sep=" "))
-      }
-      h2=c(h2, paste("fq =", retVal$qual, sep=" "))
-      legend("topright", legend=h2)
-      
-      dev.off()
-      
-      for (i in 1:length(retVal$mean)){
-        if (retVal$mean[i]!=-1){
-          file.copy(paste(outdir,"Gaussian_fit", paste(sampname, x[1], plot_label, sep="_"), sep="/"),
-                    paste(outdir,"Gaussian_fit", paste(sampname, round(retVal$mean[i],digits = 6), plot_label, sep="_"), sep="/"))
-        }
-      }
-      file.remove(paste(outdir,"Gaussian_fit", paste(sampname, x[1], plot_label, sep="_"), sep="/"))
-    }
-  }
-  
-  return(list("mean"=retVal$mean, "area"=retVal$area, "qual"=retVal$qual, "min"=retVal$min, "max"=retVal$max))
-  
-}
diff --git a/DIMS/AddOnFunctions/generateBreaksFwhm.R b/DIMS/AddOnFunctions/generateBreaksFwhm.R
deleted file mode 100644
index 1a42631..0000000
--- a/DIMS/AddOnFunctions/generateBreaksFwhm.R
+++ /dev/null
@@ -1,26 +0,0 @@
-# options(digits=16)
-# resol = 140000
-# breaks.fwhm = 1
-# i = 1
-# breaks.fwhm.avg = NULL
-#
-# Sys.time()
-#
-# while (breaks.fwhm[length(breaks.fwhm)]<1000){
-#
-#   resol.mz = resol*(1/sqrt(2)^(log2(breaks.fwhm[i]/200)))
-#   fwhm.0.1 = (breaks.fwhm[i]/resol.mz)/10
-#   breaks.fwhm = c(breaks.fwhm, breaks.fwhm[i] + fwhm.0.1)
-#   breaks.fwhm.avg = c(breaks.fwhm.avg,(breaks.fwhm[i] +breaks.fwhm[i+1])/2)
-#
-#   if (i %% 10000 == 0){
-#     cat(paste("i =", i))
-#     cat(paste("breaks.fwhm =", breaks.fwhm[length(breaks.fwhm)]))
-#   }
-#
-#   i = i + 1
-# }
-#
-# Sys.time()
-# save(list=c("breaks.fwhm", "breaks.fwhm.avg"), file="breaks.RData")
-# cat("Breaks saved!")
diff --git a/DIMS/AddOnFunctions/generateExcelFile.R b/DIMS/AddOnFunctions/generateExcelFile.R
deleted file mode 100644
index 31474d9..0000000
--- a/DIMS/AddOnFunctions/generateExcelFile.R
+++ /dev/null
@@ -1,24 +0,0 @@
-generateExcelFile <- function(peaklist,  fileName, sub, plot = TRUE) {
-  # plotdir=file.path(plotdir)
-  # imageNum=2
-  # fileName="./results/xls/Pos_allpgrps_identified"
-  # subName=c("","_box")
-  
-  end <- 0
-  i <- 0
-  
-  if (dim(peaklist)[1]>=sub & (sub>0)){
-    for (i in 1:floor(dim(peaklist)[1]/sub)){
-      start=-(sub-1)+i*sub
-      end=i*sub
-      message(paste0(start, ":", end))
-      
-      genExcelFileV3(peaklist[c(start:end),], paste(fileName, sep="_"), plot)
-    }
-  }  
-  start = end + 1
-  end = dim(peaklist)[1]
-  message(start)
-  message(end)
-  genExcelFileV3(peaklist[c(start:end),], paste(fileName, i+1, sep="_"), plot)
-}
diff --git a/DIMS/AddOnFunctions/generateGaussian.R b/DIMS/AddOnFunctions/generateGaussian.R
deleted file mode 100644
index da6dec2..0000000
--- a/DIMS/AddOnFunctions/generateGaussian.R
+++ /dev/null
@@ -1,48 +0,0 @@
-generateGaussian <- function(x,y,resol,plot,scanmode,int.factor,width,height) {
-  
-  factor=1.5
-  index = which(y==max(y))[1]
-  x=x[index]
-  y=y[index]
-  mu = x
-  fwhm = getFwhm(mu,resol)
-  x.p = c(mu-factor*fwhm, x, mu+factor*fwhm)
-  y.p = c(0, y, 0)
-  
-  #   if (plot) dir.create("./results/plots",showWarnings = FALSE)
-  #   if (plot) dir.create("./results/plots/Gaussian_fit",showWarnings = FALSE)
-  
-  if (plot) {
-    if (scanmode=="positive"){
-      plot_label="pos_fit.png"
-    } else {
-      plot_label="neg_fit.png"
-    }
-  }
-  
-  mz.range = x.p[length(x.p)] - x.p[1]
-  x2 = seq(x.p[1],x.p[length(x.p)],length=mz.range*int.factor)
-  sigma = getSD(x.p,y.p)
-  scale = optimizeGauss(x.p,y.p,sigma,mu)
-  
-  if (plot) {
-    CairoPNG(filename=paste("./results/Gaussian_fit",paste(sampname, mu, plot_label, sep="_"), sep="/"), width, height)
-    
-    plot(x.p,y.p,xlab="m/z",ylab="I", ylim=c(0,1.5*max(y)))
-    lines(x2,scale*dnorm(x2,mu,sigma), col="green")
-    half_max = max(scale*dnorm(x2,mu,sigma))*0.5
-    lines(c(mu - 0.5*fwhm, mu + 0.5*fwhm),c(half_max,half_max),col="orange")
-    abline(v = mu, col="green")
-    h=c(paste("mean =", mu, sep=" "))
-    legend("topright", legend=h)
-    
-    dev.off()
-  }
-  
-  #  area = sum(scale*dnorm(x2,mu,sigma))
-  #  area = max(scale*dnorm(x2,mu,sigma))
-  area = getArea(mu,resol,scale,sigma,int.factor)
-  
-  return(list("mean"=mu, "area"=area, "min"=x2[1] , "max"=x2[length(x2)]))
-  
-}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/getArea.R b/DIMS/AddOnFunctions/getArea.R
deleted file mode 100644
index 415b050..0000000
--- a/DIMS/AddOnFunctions/getArea.R
+++ /dev/null
@@ -1,23 +0,0 @@
-getArea <- function(mu,resol,scale,sigma,int.factor){
-  
-  #   mu=p3[1]
-  #   scale=p3[2]
-  #   sigma=sigma1
-  
-  #   mu=p3[1]
-  #   scale=p3[2]
-  #   sigma=sigma1
-  
-  # avoid too big vector (cannot allocate vector of size ...)
-  if (mu>1200) return(0)
-  
-  fwhm = getFwhm(mu,resol)
-  mzMin = mu - 2*fwhm
-  mzMax = mu + 2*fwhm
-  mz.range = mzMax - mzMin 
-  x_int = seq(mzMin,mzMax,length=mz.range*int.factor)
-  
-  #plot(x_int,scale*dnorm(x_int,mu,sigma),col="red",type="l")
-  
-  return(sum(scale*dnorm(x_int,mu,sigma))/100)  
-}
diff --git a/DIMS/AddOnFunctions/getDeltaMZ.R b/DIMS/AddOnFunctions/getDeltaMZ.R
deleted file mode 100644
index f09a0a4..0000000
--- a/DIMS/AddOnFunctions/getDeltaMZ.R
+++ /dev/null
@@ -1,5 +0,0 @@
-getDeltaMZ <- function(mass, breaks.fwhm){
-  index = which(breaks.fwhm<mass)
-  tmp = index[length(index)]
-  return(breaks.fwhm[tmp+1] - breaks.fwhm[tmp]) 
-}
diff --git a/DIMS/AddOnFunctions/getFitQuality.R b/DIMS/AddOnFunctions/getFitQuality.R
deleted file mode 100644
index f9197c8..0000000
--- a/DIMS/AddOnFunctions/getFitQuality.R
+++ /dev/null
@@ -1,65 +0,0 @@
-getFitQuality <- function(x,y,muFirst,muLast,resol,scale=NULL,sigma=NULL,sumFit=NULL){
-  # muFirst=sort(c(p5[1],p5[3],p5[5],p5[7]))[1]
-  # muLast=sort(c(p5[1],p5[3],p5[5],p5[7]))[4]
-  
-  # muFirst=p2[1]
-  # muLast=p2[1]
-  # scale=p2[2]
-  
-  # x,y,sort(c(p5[1],p5[3],p5[5],p5[7]))[1],sort(c(p5[1],p5[3],p5[5],p5[7]))[4],resol,sumFit=sumFit
-  
-  if (is.null(sumFit)){
-    # fwhm = getFwhm(muFirst,resol)
-    # intRangeMin = muFirst - 1.2*fwhm
-    # intRangeMax = muFirst + 1.2*fwhm
-    # tmp = which((x>intRangeMin) & (x<intRangeMax))
-    
-    # # mu outside range!!!
-    # if (length(tmp)==0) return(list("fq_new"=1, "x_int"=x, "y_int"=y))
-    # 
-    # x_int = x[tmp]
-    # y_int = y[tmp] 
-    
-    x_int = x
-    y_int = y
-    
-    # fq_new = mean(abs((scale*dnorm(x_int,muFirst,sigma)) - y_int)/(scale*dnorm(x_int,muFirst,sigma)))
-    fq_new = mean(abs((scale*dnorm(x_int,muFirst,sigma)) - y_int)/rep((max(scale*dnorm(x_int,muFirst,sigma))/2),length(x_int)))
-    
-    # lines(x_int,scale*dnorm(x_int,muFirst,sigma), col="blue")
-    # fq_new = abs(sum(y) - sum(p2[2]*dnorm(x,p2[1],sigma)))/sum(y)
-    
-  } else {
-    # fwhm = getFwhm(muFirst,resol)
-    # intRangeMin = muFirst - 1.2*fwhm
-    # fwhm = getFwhm(muLast,resol)
-    # intRangeMax = muLast + 1.2*fwhm
-    # 
-    # tmp = which((x>intRangeMin) & (x<intRangeMax))
-    # 
-    # if (length(tmp)==0){
-    #   # outside range
-    #   fq_new = 1
-    #   x_int = x
-    #   y_int = y
-    #   
-    # } else {
-    
-    # x_int = x[tmp]
-    # y_int = y[tmp]
-    # sumFit_int = sumFit[tmp] 
-    
-    sumFit_int = sumFit
-    y_int = y
-    x_int = x
-    
-    # fq_new = mean(abs(sumFit_int - y_int)/sumFit_int)
-    fq_new = mean(abs(sumFit_int - y_int)/rep(max(sumFit_int)/2,length(sumFit_int)))
-    # }    
-  }
-  
-  # Caused by dividing by 0
-  if (is.nan(fq_new)) fq_new=1 
-  
-  return(list("fq_new"=fq_new, "x_int"=x_int, "y_int"=y_int))  
-}
diff --git a/DIMS/AddOnFunctions/getFwhm.R b/DIMS/AddOnFunctions/getFwhm.R
deleted file mode 100644
index 890a865..0000000
--- a/DIMS/AddOnFunctions/getFwhm.R
+++ /dev/null
@@ -1,24 +0,0 @@
-getFwhm <- function(mu,resol) {   
-  
-  if (is.nan(mu)) mu=0
-  if (is.na(mu)) mu=0
-  if (is.null(mu)) mu=0
-  if (mu<0) mu=0
-  
-  # tryCatch({
-  resol.mz = resol*(1/sqrt(2)^(log2(mu/200)))
-  fwhm = mu/resol.mz
-  #   }, warning = function(resol.mz, fwhm) {
-  #     message(warnings())
-  #     message(paste("====================> mu: ",mu))
-  #     message(paste("====================> resol.mz: ",resol.mz))
-  #   })
-  
-  #   , error = function(e) {
-  #     error-handler-code
-  #   }, finally = {
-  #     cleanup-code
-  #   })
-  
-  return(fwhm)
-}
diff --git a/DIMS/AddOnFunctions/getPatients.R b/DIMS/AddOnFunctions/getPatients.R
deleted file mode 100644
index 3b03f0a..0000000
--- a/DIMS/AddOnFunctions/getPatients.R
+++ /dev/null
@@ -1,9 +0,0 @@
-getPatients <- function(peaklist){
-  patients=colnames(peaklist)[grep("P", colnames(peaklist), fixed = TRUE)]
-  patients=unique(as.vector(unlist(lapply(strsplit(patients, ".", fixed = TRUE), function(x) x[1]))))
-  # ToDo: If 2 P's in sample names!!!!!!!!!!!!!
-  # patients=sort(as.numeric(unique(as.vector(unlist(lapply(strsplit(patients, "_P", fixed = TRUE), function(x) x[2]))))))
-  patients=sort(as.numeric(unique(as.vector(unlist(lapply(strsplit(patients, "P", fixed = TRUE), function(x) x[2]))))))
-  
-  return(patients)
-}
diff --git a/DIMS/AddOnFunctions/getSD.R b/DIMS/AddOnFunctions/getSD.R
deleted file mode 100644
index 197a002..0000000
--- a/DIMS/AddOnFunctions/getSD.R
+++ /dev/null
@@ -1,9 +0,0 @@
-getSD <- function(x,y,resol=140000) {   
-  
-  index = which(y==max(y))
-  mean = x[index]
-  resol.mz = resol*(1/sqrt(2)^(log2(mean/200)))
-  fwhm = mean/resol.mz
-  sd = (fwhm/2)*0.85 
-  return(sd)
-}
diff --git a/DIMS/AddOnFunctions/get_patient_data_to_helix.R b/DIMS/AddOnFunctions/get_patient_data_to_helix.R
deleted file mode 100644
index c582177..0000000
--- a/DIMS/AddOnFunctions/get_patient_data_to_helix.R
+++ /dev/null
@@ -1,31 +0,0 @@
-get_patient_data_to_helix <- function(metab_interest_sorted, metab_list_all){
-  # Combine Z-scores of metab groups together
-  df_all_metabs_zscores <- bind_rows(metab_interest_sorted)
-  # Change columnnames 
-  colnames(df_all_metabs_zscores) <- c("HMDB_name", "Patient", "Z_score")
-  # Change Patient column to character instead of factor
-  df_all_metabs_zscores$Patient <- as.character(df_all_metabs_zscores$Patient)
-  
-  # Delete whitespaces HMDB_name 
-  df_all_metabs_zscores$HMDB_name <- str_trim(df_all_metabs_zscores$HMDB_name, "right")
-  
-  # Split HMDB_name column on "nitine;" for match dims_helix_table
-  df_all_metabs_zscores$HMDB_name_split <- str_split_fixed(df_all_metabs_zscores$HMDB_name, "nitine;", 2)[,1]
-  
-  # Combine stofgroepen
-  dims_helix_table <- bind_rows(metab_list_all)
-  # Filter table for metabolites for Helix
-  dims_helix_table <- dims_helix_table %>% filter(Helix == "ja")
-  # Split HMDB_name column on "nitine;" for match df_all_metabs_zscores
-  dims_helix_table$HMDB_name_split <- str_split_fixed(dims_helix_table$HMDB_name, "nitine;", 2)[,1]
-  dims_helix_table <- dims_helix_table %>% select(HMDB_name_split, Helix_naam, high_zscore, low_zscore)
-  
-  # Filter DIMS results for metabolites for Helix
-  df_metabs_helix <- df_all_metabs_zscores %>% filter(HMDB_name_split %in% dims_helix_table$HMDB_name_split)
-  # Combine dims_helix_table and df_metabs_helix, adding Helix codes etc.
-  df_metabs_helix <- df_metabs_helix %>% left_join(dims_helix_table, by = join_by(HMDB_name_split))
-  
-  df_metabs_helix <- df_metabs_helix %>% select(HMDB_name, Patient, Z_score, Helix_naam, high_zscore, low_zscore)
-  
-  return(df_metabs_helix)
-}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/globalAssignments.HPC.R b/DIMS/AddOnFunctions/globalAssignments.HPC.R
deleted file mode 100644
index 9207661..0000000
--- a/DIMS/AddOnFunctions/globalAssignments.HPC.R
+++ /dev/null
@@ -1,114 +0,0 @@
-#.libPaths(new="/hpc/local/CentOS7/dbg_mz/R_libs/3.2.2")
-## calculate relative abundancies from theoretical mass and composition
-
-#library(Rdisop)
-
-#theor.table <- read.table(file="C:/Users/mraves/Metabolomics/TheoreticalMZ_Negative_composition.txt", sep="\t", header=TRUE)
-options(digits=16)
-
-#library(OrgMassSpecR)
-
-suppressPackageStartupMessages(library(lattice))
-# The following list was copied from Rdisop elements.R and corrected for C, H, O, Cl, S according to NIST
-Ba <- list(name= 'Ba', mass=130, isotope=list(mass=c(-0.093718, 0, -0.094958, 0, -0.095514, -0.094335, -0.095447, -0.094188, -0.094768),                                      abundance=c(0.00106, 0, 0.00101, 0, 0.02417, 0.06592, 0.07854, 0.1123, 0.717)))
-Br <- list(name= 'Br', mass=79,  isotope=list(mass=c(-0.0816639, 0, -0.083711),                                                                                               abundance=c(0.5069, 0, 0.4931)))
-C <-  list(name= 'C',  mass=12,  isotope=list(mass=c(0, 0.003354838, 0.003241989),                                                                                            abundance=c(0.9893, 0.0107, 0)))
-Ca <- list(name= 'Ca', mass=40,  isotope=list(mass=c(-0.0374094, 0, -0.0413824, -0.0412338, -0.0445194, 0, -0.046311, 0, -0.047467),                                          abundance=c(0.96941, 0, 0.00647, 0.00135, 0.02086, 0, 4e-05, 0, 0.00187)))
-Cl <- list(name= 'Cl', mass=35,  isotope=list(mass=c(-0.03114732, 0, -0.03409741),                                                                                            abundance=c(0.7576, 0, 0.2424)))
-Cr <- list(name= 'Cr', mass=50,  isotope=list(mass=c(-0.0539536, 0, -0.0594902, -0.0593487, -0.0611175),                                                                      abundance=c(0.04345, 0, 0.83789, 0.09501, 0.02365)))
-Cu <- list(name= 'Cu', mass=63,  isotope=list(mass=c(-0.0704011, 0, -0.0722071),                                                                                              abundance=c(0.6917, 0, 0.3083)))
-F <-  list(name= 'F',  mass=19,  isotope=list(mass=c(-0.00159678),                                                                                                            abundance=c(1)))
-Fe <- list(name= 'Fe', mass=54,  isotope=list(mass=c(-0.0603873, 0, -0.0650607, -0.0646042, -0.0667227),                                                                      abundance=c(0.058, 0, 0.9172, 0.022, 0.0028)))
-H <-  list(name= 'H',  mass=1,   isotope=list(mass=c(0.00782503207, 0.014101778, 0.01604928),                                                                                 abundance=c(0.999885, 0.000115, 0)))
-Hg <- list(name= 'Hg', mass=196, isotope=list(mass=c(-0.034193, 0, -0.033257, -0.031746, -0.0317, -0.029723, -0.029383, 0, -0.026533),                                        abundance=c(0.0015, 0, 0.0997, 0.1687, 0.231, 0.1318, 0.2986, 0, 0.0687)))
-I <-  list(name= 'I',  mass=127, isotope=list(mass=c(-0.095527),                                                                                                              abundance=c(1)))
-K <-  list(name= 'K',  mass=39,  isotope=list(mass=c(-0.0362926, -0.0360008, -0.0381746),                                                                                     abundance=c(0.932581, 0.000117, 0.067302)))
-Li <- list(name= 'Li', mass=6,   isotope=list(mass=c(0.0151214, 0.016003),                                                                                                    abundance=c(0.075, 0.925)))
-Mg <- list(name= 'Mg', mass=24,  isotope=list(mass=c(-0.0149577, -0.0141626, -0.0174063),                                                                                     abundance=c(0.7899, 0.1, 0.1101)))
-Mn <- list(name= 'Mn', mass=55,  isotope=list(mass=c(-0.0619529),                                                                                                             abundance=c(1)))
-N <-  list(name= 'N',  mass=14,  isotope=list(mass=c(0.003074002, 0.00010897),                                                                                                abundance=c(0.99634, 0.00366)))
-Na <- list(name= 'Na', mass=23,  isotope=list(mass=c(-0.0102323),                                                                                                             abundance=c(1)))
-Ni <- list(name= 'Ni', mass=58,  isotope=list(mass=c(-0.0646538, 0, -0.0692116, -0.0689421, -0.0716539, 0, -0.0720321),                                                       abundance=c(0.68077, 0, 0.26223, 0.0114, 0.03634, 0, 0.00926)))
-O <-  list(name= 'O',  mass=16,  isotope=list(mass=c(-0.00508538044, -0.0008683, -0.0008397),                                                                                 abundance=c(0.99757, 0.000381, 0.00205)))
-P <-  list(name= 'P',  mass=31,  isotope=list(mass=c(-0.026238),                                                                                                              abundance=c(1)))
-S <-  list(name= 'S',  mass=32,  isotope=list(mass=c(-0.027929, -0.02854124, -0.0321331, 0, -0.03291924),                                                                     abundance=c(0.9499, 0.0075, 0.0425, 0, 1e-04)))
-Se <- list(name= 'Se', mass=74,  isotope=list(mass=c(-0.0775254, 0, -0.080788, -0.0800875, -0.0826924, 0, -0.0834804, 0, -0.0833022),                                         abundance=c(0.0089, 0, 0.0936, 0.0763, 0.2378, 0, 0.4961, 0, 0.0873)))
-Si <- list(name= 'Si', mass=28,  isotope=list(mass=c(-0.0230729, -0.0235051, -0.0262293),                                                                                     abundance=c(0.9223, 0.0467, 0.031)))
-Sn <- list(name= 'Sn', mass=112, isotope=list(mass=c(-0.095174, 0, -0.097216, -0.096652, -0.098253, -0.097044, -0.098391, -0.09669, -0.0978009, 0, -0.0965596, 0, -0.0947257),abundance=c(0.0097, 0, 0.0065, 0.0034, 0.1453, 0.0768, 0.2423, 0.0859, 0.3259, 0, 0.0463, 0, 0.0579)))
-Zn <- list(name= 'Zn', mass=64,  isotope=list(mass=c(-0.0708552, 0, -0.0739653, -0.0728709, -0.0751541, 0, -0.074675),                                                        abundance=c(0.486, 0, 0.279, 0.041, 0.188, 0, 0.006)))
-
-NH4 <- list(name= "NH4", mass=18, isotope=list(mass=c(0.03437, 0.03141, -0.95935)), abundance=c(0.995, 0.004, 0.001)) # SISweb: 18.03437 100 19.03141 0.4 19.04065 0.1
-Ac <- list(name= "Ac", mass=60, isotope=list(mass=c(0.02114, 0.02450, 0.02538)), abundance=c(0.975, 0.021, 0.004))   # SISweb: 60.02114 100 61.02450 2.2 62.02538 0.4
-NaCl <- list(name= "NaCl", mass=58, isotope=list(mass=c(-0.04137, 0, -0.04433)), abundance=c(0.755, 0, 0.245))   # SISweb: 57.95862 100 59.95567 32.4
-NaCl2 <- list(name= "NaCl2", mass=116, isotope=list(mass=c(-0.08274, 0, -0.08866)), abundance=c(0.755, 0, 0.245))   # SISweb: 57.95862 100 59.95567 32.4
-NaCl3 <- list(name= "NaCl3", mass=174, isotope=list(mass=c(-0.12411, 0, -0.13299)), abundance=c(0.755, 0, 0.245))   # SISweb: 57.95862 100 59.95567 32.4
-NaCl4 <- list(name= "NaCl4", mass=232, isotope=list(mass=c(-0.16548, 0, -0.17732)), abundance=c(0.755, 0, 0.245))   # SISweb: 57.95862 100 59.95567 32.4
-NaCl5 <- list(name= "NaCl5", mass=290, isotope=list(mass=c(-0.20685, 0, -0.22165)), abundance=c(0.755, 0, 0.245))   # SISweb: 57.95862 100 59.95567 32.4
-For <- list(name= "For", mass=45, isotope=list(mass=c(-0.00233, 0.00103)), abundance=c(0.989, 0.011))   # SISweb: 46.00549 100 47.00885 1.1 (47.0097 0.1) 48.00973 0.4
-Na2 <- list(name= "2Na-H", mass=46, isotope=list(mass=c(-1.0282896)), abundance=c(1))  # SISweb for Na2: 45.97954 100 # minus 1 H !
-Met <- list(name= "CH3OH", mass=32, isotope=list(mass=c(1.034045,1.037405)), abundance=c(0.989,0.011)) # SISweb: 32.02622 100 33.02958 1.1 33.0325 0.1 34.03046 0.2
-CH3OH <- list(name= "CH3OH", mass=32, isotope=list(mass=c(1.034045,1.037405)), abundance=c(0.989,0.011)) # SISweb: 32.02622 100 33.02958 1.1 33.0325 0.1 34.03046 0.2
-Na3 <- list(name= "3Na-2H", mass=69, isotope=list(mass=c(-2.0463469)), abundance=c(1))  # SISweb for Na2: 45.97954 100 # minus 1 H !
-KCl <- list(name= "KCl", mass=74, isotope=list(mass=c(-0.06744,0.92961,-0.06744,0.92772)), abundance=c(0.7047,0.2283,0.0507,0.0162))   # SISweb: 73.93256 100 75.92961 32.4 75.93067 7.2 77.92772 2.3
-H2PO4 <- list(name= "H2PO4", mass=97, isotope=list(mass=c(-0.03091)), abundance=c(1))
-HSO4 <- list(name= "HSO4", mass=97, isotope=list(mass=c(-0.04042,0,-0.04462)), abundance=c(0.96,0,0.04))
-Met2 <- list(name= "Met2", mass=64, isotope=list(mass=c(1.060265,1.013405)), abundance=c(0.978,0.022))
-Met3 <- list(name= "Met3", mass=96, isotope=list(mass=c(1.086485,1.089845)), abundance=c(0.969,0.031))
-Met4 <- list(name= "Met4", mass=128, isotope=list(mass=c(1.112705,1.116065)), abundance=c(0.959,0.041))
-Met5 <- list(name= "Met5", mass=160, isotope=list(mass=c(1.20935,1.142285)), abundance=c(0.949,0.051))
-NaminH <- list(name= "Na-H", mass=21, isotope=list(mass=c(-0.02571416)), abundance=c(1))
-KminH <- list(name= "K-H", mass=37, isotope=list(mass=c(-0.05194,0.94617)), abundance=c(0.9328,0.0672))
-H2O <- list(name= "H2O", mass=-19, isotope=list(mass=c(-0.01894358)), abundance=c(1))
-NaK <- list(name= "NaK-H", mass=61, isotope=list(mass=c(-0.054345,0.943765)), abundance=c(0.9328,0.0672))
-min2H <- list(name= "min2H", mass=-2, isotope=list(mass=c(-0.0151014)), abundance=c(1))
-plus2H <- list(name= "plus2H", mass=2, isotope=list(mass=c(0.0151014)), abundance=c(1))
-plus2Na <- list(name= "plus2Na", mass=46,  isotope=list(mass=c(-0.02046), abundance=c(1)))
-plusNaH <- list(name= "plusNaH", mass=24,  isotope=list(mass=c(-0.00295588), abundance=c(1)))
-plusKH <- list(name= "plusKH", mass=40, isotope=list(mass=c(-0.029008,0.969101)), abundance=c(0.9328,0.0672))
-plusHNH <- list(name= "plusHNH", mass=19, isotope=list(mass=c(0.04164642)), abundance=c(1))
-min3H <- list(name= "min3H", mass=-3, isotope=list(mass=c(-0.02182926)), abundance=c(1))
-plus3H <- list(name= "plus3H", mass=3, isotope=list(mass=c(0.02182926)), abundance=c(1))
-plus3Na <- list(name= "plus3Na", mass=68, isotope=list(mass=c(0.96931)), abundance=c(1))
-plus2NaH <- list(name= "plus2NaH", mass=47, isotope=list(mass=c(0.2985712)), abundance=c(1))
-plusNa2H <- list(name= "plusNa2H", mass=25, isotope=list(mass=c(0.00432284)), abundance=c(1))
-
-Cl37 <- list(name= "Cl37", mass=37,  isotope=list(mass=c(-0.03409741)),  abundance=c(1))
-
-allelements <- list(Ba, Br, C, Ca, Cl, Cr, Cu, F, Fe, H, Hg, I, K, Li, Mg, Mn, N, Na, Ni, O, P, S, Se, Si, Sn, Zn)
-allAdducts <- list(Ba, Br, Ca, Cl, Cl37, Cr, Cu, Fe, Hg, I, K, Li, Mg, Mn, Na, Ni, Se, Si, Sn, Zn, NH4, Ac, NaCl, For,Na2,CH3OH,NaCl2,NaCl3,NaCl4,NaCl5,Na3,KCl,H2PO4,HSO4,Met2,Met3,Met4,Met5,NaminH,KminH,H2O,NaK,min2H,plus2H,plus2Na,plusNaH,plusKH,min3H,plus3H,plusHNH,plus3Na,plus2NaH,plusNa2H)
-
-#atomsinuse <- c("P",     "O",         "N",           "C",     "H",          "S",        "Cl")
-#atomicWts <- c(30.97376163, 15.99491463, 14.0030740052, 12.0000, 1.0078250321, 31.9720707, 34.968852721)
-#electron <- 0.00054858
-#Mol.comp <- c(0,4,0,0,1,1,0)
-#Mol.exact <- sum(Mol.comp * atomicWts) + electron
-#Mol.corr <- Mol.exact - 0.0022 + 0.000007*Mol.exact  # mass as found in peak group list
-#getMass(Mol) + electron - 0.0022 + 0.000007*getMass(Mol)
-
-#atomsinuse <- c("P",     "O",         "N",           "C",     "H",          "S",        "Cl",          "D",           "34S",          "18O")
-#atomicWts <- c(30.97376163, 15.99491463, 14.0030740052, 12.0000, 1.0078250321, 31.9720707, 34.968852721, 2.0141017778, 33.96786690, 17.9991610)
-#electron <- 0.00054858
-#Mol.comp <- c(0,4,0,0,1,1,0,0,0,0)  # main peak HSO4
-#Mol.comp <- c(0,4,0,0,0,1,0,1,0,0)  # deuterated
-#Mol.comp <- c(0,4,0,0,1,0,0,0,1,0)  # 34S
-#Mol.comp <- c(0,3,0,0,1,1,0,0,0,1)  # 18O
-#Mol.exact <- sum(Mol.comp * atomicWts) + electron
-#Mol.corr <- Mol.exact - 0.0022 + 0.000007*Mol.exact  # mass as found in peak group list
-
-atomsinuse <- c("P",     "O",         "N",           "C",     "H",          "S",        "Cl",          "D",           "13C",       "34S",          "18O",     "37Cl")
-atomicWts <- c(30.97376163, 15.99491463, 14.0030740052, 12.0000, 1.0078250321, 31.9720707, 34.968852721, 2.0141017778, 13.0033548378, 33.96786690, 17.9991610, 36.96590259)
-electron <- 0.00054858
-############# P O N C H  S Cl D 13C 34S 18O 37Cl
-#Mol.comp <- c(0,6,0,6,12,0,1, 0, 0, 0,  0,  0)  # main peak Galactose HCl negative ion
-#Mol.comp <- c(0,6,0,5,12,0,1, 0, 1, 0,  0,  0)  # 13C Galactose HCl negative ion
-#Mol.comp <- c(0,6,0,6,11,0,1, 1, 0, 0,  0,  0)  # D Galactose HCl negative ion
-#Mol.comp <- c(0,5,0,6,12,0,1, 0, 0, 0,  1,  0)  # 18O Galactose HCl negative ion
-#Mol.comp <- c(0,6,0,6,12,0,0, 0, 0, 0,  0,  1)  # 18O Galactose HCl negative ion
-#Mol.exact <- sum(Mol.comp * atomicWts) + electron
-#Mol.corr <- Mol.exact - 0.0022 + 0.000007*Mol.exact  # mass as found in peak group list
-
-Hmass <- H$mass + H$isotope$mass[1]
-Dmass <- H$mass + 1 + H$isotope$mass[2]
-Tmass <- H$mass + 2 + H$isotope$mass[3]
-C13mass <- C$mass + 1 + C$isotope$mass[2]
-N15mass <- N$mass + 1 + N$isotope$mass[2]
diff --git a/DIMS/AddOnFunctions/iden.code.R b/DIMS/AddOnFunctions/iden.code.R
deleted file mode 100644
index 4853647..0000000
--- a/DIMS/AddOnFunctions/iden.code.R
+++ /dev/null
@@ -1,45 +0,0 @@
-iden.code <- function(peaklist, db, ppm, theor_mass_label) {
-  # theor_mass_label = {"MNeg", "Mpos"}
-  
-  mcol <- peaklist[ , "mzmed.pgrp"]
-  theor.mcol <- db[,theor_mass_label]
-  assi_HMDB <- iso_HMDB <- HMDB_code <- c(rep("",length(mcol)))
-  theormz_HMDB <- c(rep(0,length(mcol)))
-  
-  for(t in 1:length(mcol)){
-    mz<-mcol[t]
-    mtol <- mz*ppm/1000000
-    selp <- which(theor.mcol > (mz - mtol) & theor.mcol < (mz + mtol))
-    assinames <- isonames <- HMDBcodes <- ""
-    
-    if(length(selp)!=0){
-      for(i in 1:length(selp)){
-        
-        if(grepl(" iso ", db[selp[i], "CompoundName"])){
-          mainpeak <- strsplit(db[selp[i],"CompoundName"]," iso ")[[1]][1]
-          
-          # Check if peak without isotope occurs, this assumes that peaklist is ordered on mass!!! 
-          if(length(grep(mainpeak, assi_HMDB, fixed=TRUE)) > 0){
-            isonames <- as.character(paste(isonames,as.character(db[selp[i],"CompoundName"]), sep=";"))
-          }
-          
-        } else {
-          assinames <- as.character(paste(assinames,as.character(db[selp[i], "CompoundName"]), sep=";"))
-          HMDBcodes <- as.character(paste(HMDBcodes, as.character(rownames(db)[selp[i]]), sep=";"))
-        }  
-      }
-    }  
-    
-    assi_HMDB[t] <- as.character(assinames)
-    iso_HMDB[t] <-  as.character(isonames)
-    if ((assinames=="") & (isonames=="")){ 
-      theormz_HMDB[t] <- ""
-    } else {
-      theormz_HMDB[t] <- theor.mcol[selp[1]]
-    }
-    HMDB_code[t] <- as.character(HMDBcodes)
-  }
-  
-  return(cbind(peaklist, assi_HMDB, iso_HMDB, theormz_HMDB, HMDB_code))
-}
-
diff --git a/DIMS/AddOnFunctions/ident.hires.noise.HPC.R b/DIMS/AddOnFunctions/ident.hires.noise.HPC.R
deleted file mode 100644
index 0be43d1..0000000
--- a/DIMS/AddOnFunctions/ident.hires.noise.HPC.R
+++ /dev/null
@@ -1,242 +0,0 @@
-# modified identify function to also look for adducts and their isotopes
-ident.hires.noise.HPC <- function(peaklist, allAdducts, scanmode="Negative", look4=c("Cl", "Ac"), identlist=NULL, resol=280000, slope=0, incpt=0, ppm.fixed, ppm.iso.fixed) {
-  
-  #     peaklist=outlist_Neg
-  #     scanmode="Negative"
-  #     identlist=noise.Neg.MZ
-  #     look4=look4.addN
-  #     resol=resol
-  #     slope=0 
-  #     incpt=0
-  #     ppm.fixed=3
-  #     ppm.iso.fixed=2  
-  
-  metlin <- assi <- iso <- rep("", nrow(peaklist))
-  theormz <- nisos <- expint <- conf <- rep(0, nrow(peaklist))
-  # nrH <- nrD <- nrC <- nr13C <- nrN <- nr15N <- nrO <- nrP  <- nrS <- nrCl <- rep(0, nrow(peaklist))
-  
-  # add adducts to identification list
-  if (scanmode == "Positive") { add.mode <-  "+" } else { add.mode <- "-" }
-  # identlist <- theorMZ
-  identlist.orig <- identlist
-  
-  for (p in 1:length(look4)) { # loop over type of adduct
-    #print(p)
-    identlist.Adduct <- identlist.orig
-    identlist.Adduct[ , "CompoundName"] <- as.character(identlist.orig[ , "CompoundName"])
-    
-    #add2label <- paste("[M+", look4[p], "]", add.mode, sep="")
-    if (look4[p]=="H2O") {
-      add2label <- paste("[M-", look4[p], "]", add.mode, sep="")
-    } else {
-      add2label <- paste("[M+", look4[p], "]", add.mode, sep="")
-    }
-    
-    identlist.Adduct[ , "CompoundName"] <- paste(identlist.Adduct[ , "CompoundName"], add2label, sep=" ")
-    adductInfo <- elementInfo(look4[p], allAdducts)
-    if (scanmode == "Positive") {
-      adductMass <- adductInfo$mass[1] + adductInfo$isotope$mass[1] - Hmass } else {
-        adductMass <- adductInfo$mass[1] + adductInfo$isotope$mass[1] + Hmass
-      }
-    for (q in 1:nrow(identlist.Adduct)) { # loop over compounds in database
-      # construct information for compound + adduct:
-      if (scanmode == "Positive") {
-        identlist.Adduct[q, "Mpos"] <-  as.numeric(identlist.Adduct[q, "Mpos"]) + adductMass # + Na - H
-        identlist.Adduct[q, "MNeg"] <-  0
-      } else {
-        identlist.Adduct[q, "Mpos"] <-  0
-        identlist.Adduct[q, "MNeg"] <-  as.numeric(identlist.Adduct[q, "MNeg"]) + adductMass # + Cl + H
-      }
-    }
-    
-    #     # modify columns with info for mol. formula:
-    #     if (look4[p] == "2Na-H") {
-    #       identlist.Adduct[, "nrH"] <- as.numeric(identlist.Adduct[, "nrH"]) - 1
-    #     } else if (look4[p] == "NH4") {
-    #       identlist.Adduct[, "nrH"] <- as.numeric(identlist.Adduct[, "nrH"]) + 3
-    #       identlist.Adduct[, "nrN"] <- as.numeric(identlist.Adduct[, "nrN"]) + 1
-    #     } else if (look4[p] == "Cl") {
-    #       identlist.Adduct[, "nrCl"] <- as.numeric(identlist.Adduct[, "nrCl"]) + 1
-    #       identlist.Adduct[, "nrH"] <- as.numeric(identlist.Adduct[, "nrH"]) + 1
-    #     } else if (look4[p] == "Ac") {
-    #       identlist.Adduct[ , "nrC"] <- as.numeric(identlist.Adduct[ , "nrC"]) + 2
-    #       identlist.Adduct[ , "nrH"] <- as.numeric(identlist.Adduct[ , "nrH"]) + 3
-    #       identlist.Adduct[ , "nrO"] <- as.numeric(identlist.Adduct[ , "nrO"]) + 2
-    #     } else if (look4[p] == "CH3OH+H") {
-    #       identlist.Adduct[ , "nrC"] <- as.numeric(identlist.Adduct[ , "nrC"]) + 1
-    #       identlist.Adduct[ , "nrH"] <- as.numeric(identlist.Adduct[ , "nrH"]) + 4
-    #       identlist.Adduct[ , "nrO"] <- as.numeric(identlist.Adduct[ , "nrO"]) + 1
-    #     } else {
-    #       identlist.Adduct[, "nrH"] <- as.numeric(identlist.Adduct[, "nrH"]) - 1
-    #     }
-    identlist <- rbind(identlist, identlist.Adduct) 
-  } # end for p adducts in look4
-  
-  
-  if (scanmode == "Positive") { theor.mcol <- as.numeric(identlist[ , "Mpos"]) } else {
-    theor.mcol <- as.numeric(identlist[ , "MNeg"]) }
-  # apply correction using regression line obtained with ISses
-  theor.mcol <- (1+slope)*theor.mcol + incpt
-  
-  # get mz information from peaklist
-  mcol <- peaklist[ , "mzmed.pgrp"]
-  # if column with average intensities is missing, calculate it:
-  if (!("avg.int" %in% colnames(peaklist))){
-    mzmaxcol <- which(colnames(peaklist) == "mzmax.pgrp")
-    endcol <- ncol(peaklist)
-    peaklist[ , "avg.int"] <- apply(peaklist[ ,(mzmaxcol+1):(endcol)], 1, mean)
-  }
-  
-  #   # generate URL for Metlin:
-  #   for (p in 1:nrow(peaklist)) { # for each peak  # p <- 1
-  #     mzpeak <- as.numeric(mcol[p])
-  #     # resolution as function of mz:
-  #     resol.mz <- resol*(1/sqrt(2)^(log2(mzpeak/200)))
-  #     fwhm <- mzpeak/resol.mz
-  #     #    massmin <- mzpeak - (0.00003 + 0.000003*mzpeak) - (1.0078250321 - 0.00054858) - fwhm  # for Metlin db
-  #     #    massmax <- mzpeak - (0.00003 + 0.000003*mzpeak) - (1.0078250321 - 0.00054858) + fwhm  # for Metlin db
-  #     if (scanmode == "Positive") {
-  #       massmin <- mzpeak - (1.0078250321 - 0.00054858) - fwhm  # for Metlin db
-  #       massmax <- mzpeak - (1.0078250321 - 0.00054858) + fwhm  # for Metlin db
-  #     } else {
-  #       massmin <- mzpeak + (1.0078250321 - 0.00054858) - fwhm  # for Metlin db
-  #       massmax <- mzpeak + (1.0078250321 - 0.00054858) + fwhm  # for Metlin db
-  #     }
-  #     metlin[p] <- paste("http://metlin.scripps.edu/metabo_list.php?mass_min=", massmin, "&mass_max=", massmax, sep="")
-  #   }
-  
-  
-  # do indentification using own database:
-  for (t in 1:nrow(identlist)) { # theoretical mass  # t <- 45
-    # for (t in 1:169) {
-    #print(as.character(identlist[t,"CompoundName"]))
-    theor.mz <- theor.mcol[t]
-    
-    theor.comp <- as.character(identlist[t, "Composition"])
-    #theor.comp <- mol.formula(identlist[t, ])
-    
-    #     # if there's Deuterium, Tritium, 13C or 15N in the composition:
-    #     mass.incr <- 0 + (as.numeric(identlist[t,"nrD"])*Dmass) +
-    #       (as.numeric(identlist[t,"nrT"])*Tmass) +
-    #       (as.numeric(identlist[t,"nrC13"])*C13mass) +
-    #       (as.numeric(identlist[t,"nrN15"])*N15mass)
-    #     theor.comp <- strsplit(theor.comp, "iso")[[1]][1]
-    mass.incr <- 0
-    
-    # resolution as function of mz:
-    resol.mz <- resol*(1/sqrt(2)^(log2(theor.mz/200)))
-    # calculate fine-grained isotopic distribution using MIDAs
-    fwhm <- round(theor.mz/resol.mz,6)
-    
-    #system(paste("C:/Users/awillem7/tools/MIDAs_New/MIDAs_Example ", theor.comp, " 2  C00 \"\"  \"\"  ", fwhm, " 1 0 1e-50 2 tmp", sep=""),ignore.stderr=TRUE)
-    #system(paste("C:/Users/mraves/Metabolomics/MIDAs_New/MIDAs_Example ", theor.comp, " 2  C00 \"\"  \"\"  ", fwhm, " 1 0 1e-50 2 tmp", sep=""),ignore.stderr=TRUE)
-    #system(paste(path2MIDAS, theor.comp, " 2  C00 \"\"  \"\"  ", fwhm, " 1 0 1e-50 2 tmp", sep=""),ignore.stderr=TRUE)
-    
-    #system(paste("/data/home/luyf/Metabolomics/MIDAs/MIDAs_Example ", theor.comp, " 2  C00 \"\"  \"\"  ", fwhm, " 1 0 1e-50 2 tmp", sep=""),ignore.stderr=TRUE)
-    options(stringsAsFactors = FALSE)
-    #fgid <- read.table(file="tmp_Fine_Grained_Isotopic_Distribution", header=FALSE)
-    
-    #     res <- try(fgid <- read.table(file="tmp_Fine_Grained_Isotopic_Distribution", header=FALSE))
-    #     if(inherits(res, "try-error"))
-    #     {
-    #       #error handling code, maybe just skip this iteration using
-    #       message("Skipped")
-    #       next
-    #     }
-    #     
-    #     # correct mass for D, T, 13C and 15N
-    #     fgid[ ,1] <- as.numeric(fgid[ ,1]) + mass.incr
-    #     # calculate percentage intensities from relative intensities
-    #     firstone <- as.numeric(fgid[1,2])
-    #     fgid[ ,3] <- as.numeric(fgid[ , 2]) / firstone
-    #     #fgid <- as.matrix(fgid, ncol=3)
-    #     # the mz in the MIDAs file are of the neutral molecule
-    #     if (scanmode == "Positive") { mz.iso <- as.numeric(fgid[ , 1]) + Hmass - electron }
-    #     if (scanmode == "Negative") { mz.iso <- as.numeric(fgid[ , 1]) - Hmass + electron }
-    #     fgid <- cbind(fgid, mz.iso)
-    #     colnames(fgid) <- c("mz","rel.int", "perc.int", "mz.iso")
-    
-    # compensate mz for presence of adduct
-    #     Adduct.mass <- theor.mz - mz.iso[1]
-    #     fgid[ , "mz.iso"] <- as.numeric(fgid[ , "mz.iso"]) + Adduct.mass
-    
-    # set tolerance for mz accuracy of main peak
-    mtol <- theor.mz*ppm.fixed/1000000
-    # find main peak
-    selp <- which(mcol > (theor.mz - mtol) & mcol < (theor.mz + mtol))
-    # selp <- which(mcol > (theor.mz - 0.01) & mcol < (theor.mz + 0.01))
-    # peaklist[selp, c(1:4,(endcol+1))]
-    
-    # set tolerance for mz accuracy of isotope peaks
-    itol <- theor.mz*ppm.iso.fixed/1000000
-    
-    #     if (length(selp) > 1) { # more than one candidate peak for main; select best one based on isotope pattern
-    #       #cat(as.character(identlist[t, "CompoundName"])); print(" has >1 candidate peaks")
-    #       conf.local <- rep(0, length(selp))
-    #       for (p in 1:length(selp)) { # p <- 2
-    #         # determine isotope pattern for each candidate peak
-    #         # obs.mz <- peaklist[selp[p],"mzmed.pgrp"]
-    #         conf.local[p] <- match.isotope.pattern(peaklist, scanmode, selp[p], fgid, ppm.iso.fixed)
-    #       }
-    #       # selp <- selp[abs(mcol[selp] - theor.mz) == min(abs(mcol[selp] - theor.mz))] }
-    #       selp <- na.exclude(selp[conf.local == max(conf.local)])
-    #     }
-    if (length(selp) > 1) { # more than one candidate peak for main; select best one based on mz.diff
-      selp <- selp[abs(mcol[selp] - theor.mz) == min(abs(mcol[selp] - theor.mz))] 
-    }
-    if (length(selp) == 1) { # match for main
-      assi[selp] <- paste(assi[selp], as.character(identlist[t,"CompoundName"]), sep=";")
-      theormz[selp] <- theor.mz
-      # conf[selp] <- match.isotope.pattern(peaklist, scanmode, selp, fgid, ppm.iso.fixed)
-      
-      #       nrH[selp] <- identlist[t,"nrH"]
-      #       nrD[selp] <- identlist[t,"nrD"]
-      #       nrC[selp] <- identlist[t,"nrC"]
-      #       nr13C[selp] <- identlist[t,"nrC13"]
-      #       nrN[selp] <- identlist[t,"nrN"]
-      #       nr15N[selp] <- identlist[t,"nrN15"]
-      #       nrO[selp] <- identlist[t,"nrO"]
-      #       nrP[selp] <- identlist[t,"nrP"]
-      #       nrS[selp] <- identlist[t,"nrS"]
-      #       nrCl[selp] <- identlist[t,"nrCl"]   
-      
-      # assign isotope peaks
-      #       mz.main <- peaklist[selp, "mzmed.pgrp"]  # mz of main peak
-      #       int.main <- peaklist[selp, "avg.int"]  # intensity of main peak (= 100%)
-      #       # deviation from theoretical mass:
-      #       diff <- theor.mz - mz.main
-      #       # calculate expected intensities and select isotopes with exp.int > threshold
-      #       fgid[ , "exp.int"] <- fgid[ , "perc.int"] * int.main      
-      #       fgid.subset <- fgid[(fgid[ , "exp.int"] > thresh), ]
-      #       nisos[selp] <- nrow(fgid.subset) - 1
-      #       if (nrow(fgid.subset) > 1) { # avoid error message if fgid.subset has only 1 line
-      #         for (f in 2:nrow(fgid.subset)) { # f <- 2
-      #           mz.target <- fgid.subset[f, "mz.iso"] - diff
-      #           int.target <- fgid.subset[f, "exp.int"]
-      #           # print(itarget)
-      #           sel.iso <- peaklist[ , "mzmed.pgrp"] > (mz.target - itol) & peaklist[ , "mzmed.pgrp"] < (mz.target + itol)
-      #           # sum(sel.iso)
-      #           # sel.iso <- peaklist[ , "mzmed.pgrp"] > (mz.target - 0.01) & peaklist[ , "mzmed.pgrp"] < (mz.target + 0.01)
-      #           # peaklist[sel.iso, c(1:4,23)]
-      #           if (sum(sel.iso) == 1) { # 2 separate if-statements because of error if sum(sel.iso) = 0
-      #             if (peaklist[sel.iso, "avg.int"] >  (int.target/2)) { # match
-      #               iso[sel.iso] <- paste(paste(iso[sel.iso], as.character(identlist[t,"CompoundName"]), "iso", f, sep=" "),";", sep="")
-      #               # peaklist[sel.iso, ]
-      #               expint[sel.iso] <- fgid.subset[f, "exp.int"]
-      #             }
-      #           } else if (sum(sel.iso) > 1) {
-      #             nrs.iso <- which(sel.iso)
-      #             nr.iso <- nrs.iso[which(abs(peaklist[sel.iso, "avg.int"] - int.target) == min(abs(peaklist[sel.iso, "avg.int"] - int.target)))]
-      #             if (peaklist[nr.iso, "avg.int"] >  (int.target/2)) {  # match
-      #               # print(peaklist[nr.iso, "avg.int"])
-      #               iso[nr.iso] <- paste(paste(iso[nr.iso], as.character(identlist[t,"CompoundName"]), "iso", f, sep=" "),";", sep="")
-      #               expint[nr.iso] <- fgid.subset[f, "exp.int"]
-      #             } # end if
-      #           } # end else if
-      #         } # end for f 
-      #       } # end if
-    } # end if 
-  } # end for t
-  #   cbind(peaklist, nrH, nrD, nrC, nr13C, nrN, nr15N, nrO, nrP, nrS, nrCl, assi, theormz, conf, nisos, iso, expint, metlin)
-  cbind(peaklist, assi, theormz, conf, nisos, iso, expint, metlin)
-}
diff --git a/DIMS/AddOnFunctions/isWithinXppm.R b/DIMS/AddOnFunctions/isWithinXppm.R
deleted file mode 100644
index 5da43c6..0000000
--- a/DIMS/AddOnFunctions/isWithinXppm.R
+++ /dev/null
@@ -1,51 +0,0 @@
-isWithinXppm <- function(mean, scale, sigma, area, x2, x, ppm=4, resol, plot) {
-  #   mean=retVal$mean
-  #   scale=retVal$scale
-  #   sigma=retVal$sigma
-  #   area=retVal$area
-  #   ppm=3
-  
-  # sort!!!!!!!!!!!!!!!!
-  index = order(mean)
-  mean = mean[index]
-  scale = scale[index]
-  sigma = sigma[index]
-  area = area[index]
-  
-  summed = NULL
-  remove = NULL
-  
-  if (length(mean)>1){
-    for (i in 2:length(mean)){
-      if ((abs(mean[i-1]-mean[i])/mean[i-1])*10^6 < ppm) {
-        
-        # avoid double occurance in sum
-        if ((i-1) %in% summed) next
-        
-        retVal = sumCurves(mean[i-1], mean[i], scale[i-1], scale[i], sigma[i-1], sigma[i], x2, x, resol, plot)
-        summed = c(summed, i-1, i)
-        if (is.nan(retVal$mean)) retVal$mean=0 
-        mean[i-1] = retVal$mean
-        mean[i] = retVal$mean
-        area[i-1] = retVal$area
-        area[i] = retVal$area
-        scale[i-1] = retVal$scale
-        scale[i] = retVal$scale
-        sigma[i-1] = retVal$sigma
-        sigma[i] = retVal$sigma
-        
-        remove = c(remove, i)
-      }
-    }
-  }
-  
-  if (length(remove)!=0){
-    mean=mean[-c(remove)]
-    area=area[-c(remove)]
-    scale=scale[-c(remove)]
-    sigma=sigma[-c(remove)]
-  }
-  
-  return(list("mean"=mean, "area"=area, "scale"=scale, "sigma"=sigma, "qual"=NULL))
-  
-}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/is_diagnostic_patient.R b/DIMS/AddOnFunctions/is_diagnostic_patient.R
deleted file mode 100644
index 839f41d..0000000
--- a/DIMS/AddOnFunctions/is_diagnostic_patient.R
+++ /dev/null
@@ -1,6 +0,0 @@
-is_diagnostic_patient <- function(patient_column){
-  # Check for Diagnostics patients with correct patientnumber (e.g. starting with "P2024M")
-  diagnostic_patients <- grepl("^P[0-9]{4}M",patient_column)
-  
-  return(diagnostic_patients)
-}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/mergeDuplicatedRows.R b/DIMS/AddOnFunctions/mergeDuplicatedRows.R
deleted file mode 100644
index 39cb8b4..0000000
--- a/DIMS/AddOnFunctions/mergeDuplicatedRows.R
+++ /dev/null
@@ -1,49 +0,0 @@
-mergeDuplicatedRows <- function(peaklist) {
-  # peaklist = outlist.tot
-  # resultDir = "./results"
-  # scanmode = "positive"
-  
-  # peaklist_index=which(peaklist[,"mzmed.pgrp"]=="94.9984524624111")
-  # peaklist[peaklist_index,]
-  
-  collapse <- function(label,pklst,index){
-    # label = "iso_HMDB"
-    # pklst = peaklist
-    # index = peaklist_index
-    tmp2=as.vector(pklst[index,label])
-    if (length(which(is.na(tmp2)))>0) tmp2=tmp2[-which(is.na(tmp2))]
-    return(paste(tmp2,collapse = ";"))
-  } 
-  
-  options(digits=16)
-  
-  collect=NULL
-  remove=NULL
-  
-  index = which(duplicated(peaklist[, "mzmed.pgrp"]))
-  
-  while (length(index) > 0){
-    
-    peaklist_index = which(peaklist[, "mzmed.pgrp"] == peaklist[index[1], "mzmed.pgrp"])
-    # peaklist[peaklist_index,"iso_HMDB",drop=FALSE]
-    tmp=peaklist[peaklist_index[1],,drop=FALSE]
-    
-    tmp[,"assi_HMDB"]=collapse("assi_HMDB",peaklist,peaklist_index)
-    tmp[,"iso_HMDB"]=collapse("iso_HMDB",peaklist,peaklist_index)
-    tmp[,"HMDB_code"]=collapse("HMDB_code",peaklist,peaklist_index)
-    tmp[,"assi_noise"]=collapse("assi_noise",peaklist,peaklist_index)
-    if (tmp[,"assi_noise"]==";") tmp[,"assi_noise"]=NA
-    tmp[,"theormz_noise"]=collapse("theormz_noise",peaklist,peaklist_index)
-    if (tmp[,"theormz_noise"]=="0;0") tmp[,"theormz_noise"]=NA
-    
-    collect = rbind(collect, tmp)
-    remove = c(remove, peaklist_index)
-    
-    index=index[-which(peaklist[index, "mzmed.pgrp"] == peaklist[index[1], "mzmed.pgrp"])]
-  } 
-  
-  if (!is.null(remove)) peaklist = peaklist[-remove,]
-  peaklist = rbind(peaklist,collect)
-  
-  return(peaklist)
-}
diff --git a/DIMS/AddOnFunctions/normalization_2.1.R b/DIMS/AddOnFunctions/normalization_2.1.R
deleted file mode 100644
index ed99964..0000000
--- a/DIMS/AddOnFunctions/normalization_2.1.R
+++ /dev/null
@@ -1,75 +0,0 @@
-# normalization_2.1(outlist.pos.id, "Intensity_all_peaks_positive_norm", groupNames.pos, on="total", assi_label="assi_HMDB")
-
-normalization_2.1 <- function(data, filename, groupNames, on="total", assi_label="assi"){
-  # data=outlist.pos.id
-  # filename = "Intensity_all_peaks_positive_norm"
-  # groupNames = groupNames.pos
-  # on="total"
-  # assi_label="assi_HMDB"
-  
-  lastcol = length(groupNames) + 6
-  before = data[ ,c(7:lastcol)]
-  
-  if (on=="total_IS") {
-    
-    assi = which(colnames(data)==assi_label)
-    data.int <- data[ ,c(assi,7:lastcol)]  # assi and samples columns
-    data.assi = data.int[grep("(IS", data.int[,1], ignore.case=FALSE, fixed = TRUE),]
-    
-  } else if (on=="total_ident"){
-    
-    assi = which(colnames(data)==assi_label)
-    assi.hmdb = which(colnames(data)=="assi.hmdb")
-    index = sort(union(which(data[,assi]!=""), which(data[,assi.hmdb]!="")))
-    
-    data.int = data[ ,c(assi,7:lastcol)]  # assi and samples columns
-    data.assi = data.int[index,]
-    
-  } else if (on=="total") {
-    
-    assi = which(colnames(data)==assi_label)
-    data.int = data[ ,c(assi,7:lastcol)]  # assi and samples columns
-    data.assi = data.int
-    
-  }
-  
-  sum <- 0
-  for (c in 2:ncol(data.assi)) {
-    sum <- sum + sum(as.numeric(data.assi[,c]))
-  }
-  average <- sum/(ncol(data.assi)-1)
-  for (c in 2:ncol(data.int)) {
-    factor <- sum(as.numeric(data.assi[,c]))/average
-    if (factor==0) {
-      data.int[ ,c]=0
-      cat(colnames(data.int)[c])
-      cat("factor==0 !!!")
-    } else {
-      data.int[ ,c] <- as.numeric(data.int[ ,c])/factor
-    }
-  }
-  
-  # colnames(data.int[,2:ncol(data.int)])
-  
-  if (dim(data)[2]==lastcol){
-    final.outlist.Pos.idpat.norm <- cbind(data[,1:6],data.int[,2:ncol(data.int)])
-  } else {
-    final.outlist.Pos.idpat.norm <- cbind(data[,1:6],data.int[,2:ncol(data.int)],data[,(lastcol + 1):ncol(data)])
-  }
-  
-  #outdir="./results/normalization"
-  #dir.create(outdir, showWarnings = FALSE)
-  
-  #CairoPNG(filename=paste(outdir, paste(filename, "_before.png", sep=""), sep="/"), width, height)
-  #sub=apply(before,2, function(x) sum(as.numeric(x)))
-  #barplot(as.vector(unlist(sub)), main="Not normalized",names.arg = colnames(before),las=2)
-  #dev.off()
-  
-  #CairoPNG(filename=paste(outdir, paste(filename, "_", on, ".png", sep=""), sep="/"), width, height)
-  #sub=apply(final.outlist.Pos.idpat.norm[,c(7:lastcol)],2, function(x) sum(as.numeric(x)))
-  #barplot(as.vector(unlist(sub)), main=filename,names.arg = colnames(final.outlist.Pos.idpat.norm[,c(7:lastcol)]),las=2)
-  #dev.off()
-  
-  return(final.outlist.Pos.idpat.norm)
-  
-}
diff --git a/DIMS/AddOnFunctions/optimizeGauss.R b/DIMS/AddOnFunctions/optimizeGauss.R
deleted file mode 100644
index 0fbce75..0000000
--- a/DIMS/AddOnFunctions/optimizeGauss.R
+++ /dev/null
@@ -1,11 +0,0 @@
-optimizeGauss <- function(x,y,sigma,mu) {
-  
-  f = function(p,x,y,sigma,mu) {
-    curve = p*dnorm(x,mu,sigma)
-    return((max(curve)-max(y))^2)
-  }
-  
-  rval = optimize(f, c(0, 100000), tol = 0.0001,x,y,sigma,mu)
-  
-  return(rval$minimum)
-}
diff --git a/DIMS/AddOnFunctions/output_helix.R b/DIMS/AddOnFunctions/output_helix.R
deleted file mode 100644
index c098675..0000000
--- a/DIMS/AddOnFunctions/output_helix.R
+++ /dev/null
@@ -1,31 +0,0 @@
-output_for_helix <- function(protocol_name, df_metabs_helix){
-  
-  # Remove positive controls
-  df_metabs_helix <- df_metabs_helix %>% filter(is_diagnostic_patient(Patient))
-  
-  # Add 'Vial' column, each patient has unique ID
-  df_metabs_helix <- df_metabs_helix %>% 
-    group_by(Patient) %>% 
-    mutate(Vial = cur_group_id()) %>% 
-    ungroup()
-  
-  # Split patient number into labnummer and Onderzoeksnummer
-  df_metabs_helix <- add_lab_id_and_onderzoeksnummer(df_metabs_helix)
-  
-  # Add column with protocol name
-  df_metabs_helix$Protocol <- protocol_name
-  
-  # Change name Z_score and Helix_naam columns to Amount and Name
-  change_columns <- c(Amount = "Z_score", Name = "Helix_naam")
-  df_metabs_helix <- df_metabs_helix %>% rename(all_of(change_columns))
-  
-  # Select only necessary columns and set them in correct order
-  df_metabs_helix <- df_metabs_helix %>% 
-    select(c(Vial, labnummer, Onderzoeksnummer, Protocol, Name, Amount))
-  
-  # Remove duplicate patient-metabolite combinations ("leucine + isoleucine + allo-isoleucin_Z-score" is added 3 times)
-  df_metabs_helix <- df_metabs_helix %>% 
-    group_by(Onderzoeksnummer, Name) %>% distinct() %>% ungroup()
-  
-  return(df_metabs_helix)
-}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/peak.grouping.Gauss.HPC.R b/DIMS/AddOnFunctions/peak.grouping.Gauss.HPC.R
deleted file mode 100644
index c36259a..0000000
--- a/DIMS/AddOnFunctions/peak.grouping.Gauss.HPC.R
+++ /dev/null
@@ -1,88 +0,0 @@
-peak.grouping.Gauss.HPC <- function(outdir, fileIn, scanmode, resol, groupNames) {
-  # fileIn="./results/specpks_all/positive_outlist_i_min_1.197.RData"
-  
-  # ppm/2
-  #range = 1.5e-06
-  range = 2e-06  
-  startcol=7
-  
-  # outlist.copy <- read.table(file=fileIn, sep="\t", header=TRUE)
-  load(fileIn)
-  outlist.copy = outlist_i_min_1
-  batch = strsplit(fileIn, ".",fixed = TRUE)[[1]][3]
-  
-  outpgrlist = NULL
-  
-  while (max(as.numeric(outlist.copy[ , "height.pkt"])) > 0 ) {
-    
-    sel = which(as.numeric(outlist.copy[ , "height.pkt"]) == max(as.numeric(outlist.copy[ , "height.pkt"])))[1]
-    
-    # 3ppm range around max
-    mzref = as.numeric(outlist.copy[sel, "mzmed.pkt"])
-    pkmin = -(range*mzref - mzref)
-    pkmax = 2*mzref-pkmin
-    
-    selp = as.numeric(outlist.copy[ , "mzmed.pkt"]) > pkmin & as.numeric(outlist.copy[ , "mzmed.pkt"]) < pkmax
-    tmplist = outlist.copy[selp, ]
-    
-    nrsamples = sum(selp)
-    if (nrsamples > 1) {
-      # 3ppm range around mean
-      mzmed.pgrp = mean(as.numeric(outlist.copy[selp, "mzmed.pkt"]))
-      mzmin.pgrp = -(range*mzmed.pgrp - mzmed.pgrp)
-      mzmax.pgrp = 2*mzmed.pgrp - mzmin.pgrp
-      
-      selp = as.numeric(outlist.copy[ , "mzmed.pkt"]) > mzmin.pgrp & as.numeric(outlist.copy[ , "mzmed.pkt"]) < mzmax.pgrp
-      tmplist = outlist.copy[selp, ]
-      
-      fq.worst.pgrp = as.numeric(max(outlist.copy[selp, "fq"]))
-      fq.best.pgrp = as.numeric(min(outlist.copy[selp, "fq"]))
-      ints.allsamps = rep(0, length(groupNames))
-      names(ints.allsamps) = groupNames # same order as sample list!!!  
-      
-      #       if (length(unique(as.vector(tmplist[,"samplenr"]))) != length(as.vector(tmplist[,"samplenr"]))) {
-      #         message(paste("bingo", sel))
-      #         break
-      #       }
-      
-      # Check for each sample if multiple peaks exists, if so take the sum! 
-      labels=unique(tmplist[,"samplenr"])
-      ints.allsamps[labels] = as.vector(unlist(lapply(labels, function(x) {sum(as.numeric(tmplist[which(tmplist[ , "samplenr"]==x), "height.pkt"]))})))
-      
-      outpgrlist = rbind(outpgrlist, c(mzmed.pgrp, fq.best.pgrp, fq.worst.pgrp, nrsamples, mzmin.pgrp, mzmax.pgrp, ints.allsamps))
-    }
-    outlist.copy[selp, "height.pkt"] = -1
-  }
-  
-  outpgrlist = as.data.frame(outpgrlist)  # ignore warnings of duplicate row names
-  colnames(outpgrlist)[1:6] = c("mzmed.pgrp", "fq.best", "fq.worst", "nrsamples", "mzmin.pgrp", "mzmax.pgrp")
-  
-  #   # filtering ##################################################################################################################
-  #   final.outlist=outpgrlist[,c("mzmed.pgrp", "fq.best", "fq.worst","nrsamples","mzmin.pgrp","mzmax.pgrp", sampleNames)]
-  #   # NB: in centroided mode, data files contains many "-1.000" values, from peak finding. Set these to zero.
-  #   final.outlist[final.outlist == -1] = 0
-  #   
-  #   # keep only peaks which occur in 3 out of 3 technical replicates in at least one sample in peak group list
-  # #   peakFiltering(repl.pattern, final.outlist, nsampgrps, outdir, scanmode, startcol=7)
-  # #   peakFiltering <- function(repl.pattern, final.outlist, nsampgrps, resultDir, scanmode, startcol){
-  #   nsamp = length(repl.pattern)
-  #   nsampgrps = length(repl.pattern[[1]])
-  #   
-  #   keep <- rep(0, nrow(final.outlist))
-  #   for (p in 1:nrow(final.outlist)) {  
-  #     for (g in 1:nsampgrps) {  # g <- 31
-  #       if (keep[p] == 0 & sum(final.outlist[p, repl.pattern[[g]]] > 0) == length(repl.pattern[[g]]) ) { keep[p] <- 1 }
-  #     }
-  #   }
-  #   
-  #   tmp <- cbind(final.outlist, keep)
-  #   final.outlist.filt <- tmp[keep == 1, ]
-  # 
-  #   # omit keep column
-  #   final.outlist.filt <- final.outlist.filt[ , -ncol(final.outlist.filt)]
-  
-  #save(outpgrlist_part, file=paste(outdir, paste(scanmode, "_", mzstart, "_", mzend, ".RData", sep=""), sep="/"))
-  # save(final.outlist.filt, file=paste(outdir, "peak_grouping", paste(scanmode, "_",batch,".RData", sep=""), sep="/"))
-  save(outpgrlist, file=paste(outdir, "peak_grouping", paste(scanmode, "_",batch,".RData", sep=""), sep="/"))
-  
-}
\ No newline at end of file
diff --git a/DIMS/AddOnFunctions/prepare_alarmvalues.R b/DIMS/AddOnFunctions/prepare_alarmvalues.R
deleted file mode 100644
index c09c536..0000000
--- a/DIMS/AddOnFunctions/prepare_alarmvalues.R
+++ /dev/null
@@ -1,52 +0,0 @@
-prepare_alarmvalues <- function(pt_name, dims_helix_table) {
-  # extract data for patient of interest (pt_name)
-  pt_metabs_helix <- dims_helix_table %>% filter(Patient == pt_name)
-  pt_metabs_helix$Z_score <- round(pt_metabs_helix$Z_score, 2)
-  
-  # Make empty dataframes for metabolites above or below alarmvalues
-  pt_list_high <- data.frame(HMDB_name = character(), Z_score = numeric())
-  pt_list_low <- data.frame(HMDB_name = character(), Z_score = numeric())
-  
-  # Loop over individual metabolites
-  for (metab in unique(pt_metabs_helix$HMDB_name)){
-    # Get data for individual metabolite
-    pt_metab <- pt_metabs_helix %>% filter(HMDB_name == metab)
-    # print(pt_metab)
-    
-    # Check if zscore is positive of negative
-    if(pt_metab$Z_score > 0) {
-      # Get specific alarmvalue for metabolite
-      high_zscore_cutoff_metab <- pt_metabs_helix %>% filter(HMDB_name == metab) %>% pull(high_zscore)
-      
-      # If zscore is above the alarmvalue, add to pt_list_high table
-      if(pt_metab$Z_score > high_zscore_cutoff_metab) {
-        pt_metab_high <- pt_metab %>% select(HMDB_name, Z_score)
-        pt_list_high <- rbind(pt_list_high, pt_metab_high)
-      }
-    } else {
-      # Get specific alarmvalue for metabolite
-      low_zscore_cutoff_metab <- pt_metabs_helix %>% filter(HMDB_name == metab) %>% pull(low_zscore)
-      
-      # If zscore is below the alarmvalue, add to pt_list_low table
-      if(pt_metab$Z_score < low_zscore_cutoff_metab) {
-        pt_metab_low <- pt_metab %>% select(HMDB_name, Z_score)
-        pt_list_low <- rbind(pt_list_low, pt_metab_low)
-      }
-    }
-  }
-  
-  # sort tables on zscore
-  pt_list_high <- pt_list_high %>% arrange(desc(Z_score))
-  pt_list_low <- pt_list_low %>% arrange(Z_score)
-  # add lines for increased, decreased
-  extra_line1 <- c("Increased", "") 
-  extra_line2 <- c("Decreased", "")
-  # combine the two lists
-  top_metab_pt <- rbind(extra_line1, pt_list_high, extra_line2, pt_list_low)
-  # remove row names
-  rownames(top_metab_pt) <- NULL
-  # change column names for display
-  colnames(top_metab_pt) <- c("Metabolite", "Z-score")
-  
-  return(top_metab_pt)
-}
diff --git a/DIMS/AddOnFunctions/prepare_data.R b/DIMS/AddOnFunctions/prepare_data.R
deleted file mode 100644
index 07a8805..0000000
--- a/DIMS/AddOnFunctions/prepare_data.R
+++ /dev/null
@@ -1,42 +0,0 @@
-prepare_data <- function(metab_list_all, zscore_patients_local) {
-  # remove "_Zscore" from column (patient) names
-  colnames(zscore_patients_local) <- gsub("_Zscore", "", colnames(zscore_patients_local))
-  # put data into pages, max 20 violin plots per page in PDF
-  metab_interest_sorted <- list()
-  metab_category <- c()
-  for (metab_class_index in 1:length(metab_list_all)) { # "acyl_carnitines" "amino_acids" "crea_gua"
-    metab_class <- names(metab_list_all)[metab_class_index]
-    metab_list <- metab_list_all[[metab_class_index]]
-    if (ncol(metab_list) > 2) {
-      # third column are the alarm values, so reduce the data frame to 2 columns and save list
-      metab_list_alarm <- metab_list
-      metab_list <- metab_list[ , c(1,2)]
-    }
-    # make sure that all HMDB_names have 45 characters 
-    for (metab_index in 1:length(metab_list$HMDB_name)) {
-      if (is.character(metab_list$HMDB_name[metab_index])) { 
-        HMDB_name_separated <- strsplit(metab_list$HMDB_name[metab_index], "")[[1]]
-      } else { HMDB_name_separated <- "strspliterror" }
-      if (length(HMDB_name_separated) <= 45) {
-        HMDB_name_separated <- c(HMDB_name_separated, rep(" ", 45-length(HMDB_name_separated)))
-      } else {
-        HMDB_name_separated <- c(HMDB_name_separated[1:42], "...")
-      }
-      metab_list$HMDB_name[metab_index] <- paste0(HMDB_name_separated, collapse = "")
-    }
-    # find metabolites and ratios in data frame zscore_patients_local
-    metab_interest <- inner_join(metab_list, zscore_patients_local[-2], by = "HMDB_code")
-    # remove column "HMDB_code"
-    metab_interest <- metab_interest[ , -which(colnames(metab_interest) == "HMDB_code")]
-    # put the data frame in long format
-    metab_interest_melt <- reshape2::melt(metab_interest, id.vars = "HMDB_name")
-    # sort on metabolite names (HMDB_name)
-    sort_order <- order(metab_interest_melt$HMDB_name)
-    metab_interest_sorted[[metab_class_index]] <- metab_interest_melt[sort_order, ]
-    metab_category <- c(metab_category, metab_class)
-  }
-  names(metab_interest_sorted) <- metab_category
- 
-  return(metab_interest_sorted)
-   
-} 
diff --git a/DIMS/AddOnFunctions/prepare_data_perpage.R b/DIMS/AddOnFunctions/prepare_data_perpage.R
deleted file mode 100644
index 9f112f3..0000000
--- a/DIMS/AddOnFunctions/prepare_data_perpage.R
+++ /dev/null
@@ -1,47 +0,0 @@
-prepare_data_perpage <- function(metab_interest_sorted, metab_interest_contr, nr_plots_perpage, nr_pat=20, nr_contr=30) {
-  total_nr_pages <- 0
-  metab_perpage <- list()
-  metab_category <- c()
-  for (metab_class_index in 1:length(metab_interest_sorted)) { # "acyl_carnitines" "amino_acids" "crea_gua"
-    # split list into pages, each page containing max nr_plots_perpage (20) compounds
-    metab_interest_perclass <- metab_interest_sorted[[metab_class_index]]
-    metab_class <- names(metab_interest_sorted)[metab_class_index]
-    # add controls
-    metab_interest_contr_perclass <- metab_interest_contr[[metab_class_index]]
-    # number of pages for this class
-    nr_pages <- ceiling(length(unique(metab_interest_perclass$HMDB_name)) / nr_plots_perpage)
-    for (page_nr in 1:nr_pages) {
-      total_nr_pages <- total_nr_pages + 1
-      select_rows_start <- (nr_pat * nr_plots_perpage * (page_nr-1)) + 1
-      select_rows_end   <-  nr_pat * nr_plots_perpage * page_nr
-      metab_onepage_pat <- metab_interest_perclass[select_rows_start:select_rows_end, ]
-      # same for controls
-      select_rows_start_contr <- (nr_contr * nr_plots_perpage * (page_nr-1)) + 1
-      select_rows_end_contr   <-  nr_contr * nr_plots_perpage * page_nr
-      metab_onepage_pcontr <- metab_interest_contr_perclass[select_rows_start_contr:select_rows_end_contr, ]
-      # add controls
-      metab_onepage <- rbind(metab_onepage_pat, metab_onepage_pcontr)
-      # if a page has fewer than nr_plots_perpage plots, fill page with empty plots
-      NA_rows <- which(is.na(metab_onepage$HMDB_name))
-      if (length(NA_rows) > 0) {
-        # repeat the patient and control variables
-        metab_onepage$variable[NA_rows] <- metab_onepage$variable[1:(nr_pat + nr_contr)]
-        # for HMDB name, substitute a number of spaces
-        for (row_nr in NA_rows) {
-          metab_onepage$HMDB_name[row_nr] <- paste0(rep("_", ceiling(row_nr/(nr_pat + nr_contr))), collapse = "")
-        }
-        metab_onepage$HMDB_name <- gsub("_", " ", metab_onepage$HMDB_name)
-        # leave the values at NA
-      }
-      # put data for one page into object with data for all pages
-      metab_perpage[[total_nr_pages]] <- metab_onepage
-      # create list of page headers
-      metab_category <- c(metab_category, paste(metab_class, page_nr, sep="_"))
-    }
-  }
-  # add page headers to list
-  names(metab_perpage) <- metab_category
-  
-  return(metab_perpage)
-  
-}
diff --git a/DIMS/AddOnFunctions/prepare_toplist.R b/DIMS/AddOnFunctions/prepare_toplist.R
deleted file mode 100644
index 369f887..0000000
--- a/DIMS/AddOnFunctions/prepare_toplist.R
+++ /dev/null
@@ -1,29 +0,0 @@
-prepare_toplist <- function(pt_name, zscore_patients_copy) {
-  # set parameters for table
-  top_highest <- 20
-  top_lowest <- 10
-  
-  # extract data for patient of interest (pt_name)
-  pt_list <- zscore_patients_copy[ , c(1,2, which(colnames(zscore_patients_copy) == pt_name))]
-  # sort metabolites on Z-scores for this patient
-  pt_list_sort <- sort(pt_list[ , 3], index.return=TRUE)
-  pt_list_sorted <- pt_list[pt_list_sort$ix, ]
-  # determine top highest and lowest Z-scores for this patient
-  pt_list_sort <- sort(pt_list[ , 3], index.return=TRUE)
-  pt_list_low  <- pt_list[pt_list_sort$ix[1:top_lowest], ]
-  pt_list_high <- pt_list[pt_list_sort$ix[length(pt_list_sort$ix):(length(pt_list_sort$ix)-top_highest+1)], ]
-  # round off Z-scores
-  pt_list_low[ , 3]  <- round(as.numeric(pt_list_low[ , 3]), 2)
-  pt_list_high[ , 3] <- round(as.numeric(pt_list_high[ , 3]), 2)
-  # add lines for increased, decreased
-  extra_line1 <- c("Increased", "", "") 
-  extra_line2 <- c("Decreased", "", "")
-  top_metab_pt <- rbind(extra_line1, pt_list_high, extra_line2, pt_list_low)
-  # remove row names
-  rownames(top_metab_pt) <- NULL
-  
-  # change column names for display
-  colnames(top_metab_pt) <- c("HMDB_ID", "Metabolite", "Z-score")
-  
-  return(top_metab_pt)
-}
diff --git a/DIMS/AddOnFunctions/remove.dupl.2.1.R b/DIMS/AddOnFunctions/remove.dupl.2.1.R
deleted file mode 100644
index 89e307d..0000000
--- a/DIMS/AddOnFunctions/remove.dupl.2.1.R
+++ /dev/null
@@ -1,32 +0,0 @@
-remove.dupl.2.1 <- function(peaklist) {
-  # peaklist = outlist.tot
-  # resultDir = "./results"  
-  # scanmode = "positive"  
-  
-  # peaklist=peaklist[1:100000,]
-  
-  collect=NULL
-  remove=NULL
-  
-  index = which(duplicated(peaklist[, "mzmed.pgrp"]))
-  
-  while (length(index) > 0){
-    
-    peaklist_index = which(peaklist[, "mzmed.pgrp"] == peaklist[index[1], "mzmed.pgrp"])
-    tmp=peaklist[peaklist_index[1],,drop=FALSE]
-    if (!is.na(peaklist[peaklist_index[1],"assi_HMDB"])) tmp[,"assi_HMDB"]=paste(peaklist[peaklist_index,"assi_HMDB"],collapse = ";") else tmp[,"assi_HMDB"]=NA
-    if (!is.na(peaklist[peaklist_index[1],"iso_HMDB"])) tmp[,"iso_HMDB"]=paste(peaklist[peaklist_index,"iso_HMDB"],collapse = ";") else tmp[,"iso_HMDB"]=NA
-    if (!is.na(peaklist[peaklist_index[1],"HMDB_code"])) tmp[,"HMDB_code"]=paste(peaklist[peaklist_index,"HMDB_code"],collapse = ";") else tmp[,"HMDB_code"]=NA
-    if (peaklist[peaklist_index[1],"assi_noise"]!="") tmp[,"assi_noise"]=paste(peaklist[peaklist_index,"assi_noise"],collapse = ";") else tmp[,"assi_noise"]=""
-    
-    collect = rbind(collect, tmp)
-    remove = c(remove, peaklist_index)
-    
-    index=index[-which(index==index[1])]
-  } 
-  
-  peaklist = peaklist[-remove,]
-  peaklist = rbind(peaklist,collect)
-  
-  return(peaklist)
-}
diff --git a/DIMS/AddOnFunctions/remove.dupl.R b/DIMS/AddOnFunctions/remove.dupl.R
deleted file mode 100644
index 227f207..0000000
--- a/DIMS/AddOnFunctions/remove.dupl.R
+++ /dev/null
@@ -1,39 +0,0 @@
-# remove duplicates, peak groups with mz within a few ppm
-# ppmdef should be 2 times bigger than during identification!!!
-remove.dupl <- function(peaklist, ppmdef=4, tolint=0.01) {
-  
-  #   peaklist = outpgrlist
-  #   ppmdef = 2
-  #   tolint = 0.01
-  
-  int.cols = 7:ncol(peaklist)
-  
-  p <- 1
-  while (p < nrow(peaklist)) {
-    mzref <- peaklist[p, "mzmed.pgrp"]
-    # print(mzref)
-    dist.ppm <- ppmdef * mzref / 1000000 
-    sel <- peaklist[ , "mzmed.pgrp"] > (mzref - dist.ppm) & peaklist[ , "mzmed.pgrp"] < (mzref + dist.ppm)
-    subset <- peaklist[sel, ]
-    if (nrow(subset) > 1) {
-      avi <- rep(1, max(int.cols))
-      for (c in int.cols) { avi[c] <- max(subset[ ,c])/mean(subset[ ,c]) }
-      
-      # remove NaN
-      avi[which(is.nan(avi))] = 1
-      
-      if (mean(avi) > (1-tolint) & mean(avi < (1+tolint))) { 
-        peaklist <- peaklist[-which(sel), ]
-        newline <- subset[1, ]
-        newline[ , "mzmed.pgrp"] <- mean(subset[ , "mzmed.pgrp"])
-        newline[ , "mzmin.pgrp"] <- min(subset[ , "mzmin.pgrp"])
-        newline[ , "mzmax.pgrp"] <- max(subset[ , "mzmax.pgrp"])
-        newline[ , int.cols] <- apply(subset[ , int.cols], 2, max)
-        # newline[ , "avg.int"] <- mean(as.numeric(newline[ , int.cols]))
-        peaklist <- rbind(peaklist, newline)
-      }
-    }
-    p <- p + 1
-  }
-  peaklist
-}
diff --git a/DIMS/AddOnFunctions/removeFromRepl.pat.R b/DIMS/AddOnFunctions/removeFromRepl.pat.R
deleted file mode 100644
index ad5b1c8..0000000
--- a/DIMS/AddOnFunctions/removeFromRepl.pat.R
+++ /dev/null
@@ -1,31 +0,0 @@
-removeFromRepl.pat <- function(bad_samples, repl_pattern, nr_replicates) {
-
-  tmp = repl_pattern
-
-  removeFromGroup=NULL
-
-  for (i in 1:length(tmp)){
-    tmp2 = repl_pattern[[i]]
-
-    remove=NULL
-
-    for (j in 1:length(tmp2)){
-      if (tmp2[j] %in% bad_samples){
-        #cat(tmp2[j])
-        #cat(paste("remove",tmp2[j]))
-        #cat(paste("remove i",i))
-        #cat(paste("remove j",j))
-        remove = c(remove, j)
-      }
-    }
-
-    if (length(remove)==nr_replicates) removeFromGroup=c(removeFromGroup,i)
-    if (!is.null(remove)) repl_pattern[[i]]=repl_pattern[[i]][-remove]
-  }
-
-  if (length(removeFromGroup)!=0) {
-    repl_pattern=repl_pattern[-removeFromGroup]
-  }
-
-  return(list("pattern"=repl_pattern))
-}
diff --git a/DIMS/AddOnFunctions/replaceZeros.R b/DIMS/AddOnFunctions/replaceZeros.R
deleted file mode 100644
index 234d961..0000000
--- a/DIMS/AddOnFunctions/replaceZeros.R
+++ /dev/null
@@ -1,90 +0,0 @@
-replaceZeros <- function(outpgrlist, repl_pattern, scanmode, resol, outdir, thresh, ppm) {
-  # file="./results/grouping_rest/negative_1.RData"
-  # scanmode= "negative"
-  # scriptDir="./scripts"
-  # resol=140000
-  # thresh=2000
-  # outdir="./results"
-  
-  # control_label="C"
-  
-  # source(paste(scriptDir, "AddOnFunctions/sourceDir.R", sep="/"))
-  # sourceDir(paste(scriptDir, "AddOnFunctions", sep="/"))
-  
-  # dir.create(paste(outdir, "9-samplePeaksFilled", sep="/"), showWarnings = FALSE)
-  
-  # int.factor=1*10^5 # Number of x used to calc area under Gaussian (is not analytic)
-  # scale=2 # Initial value used to estimate scaling parameter
-  # width=1024
-  # height=768
-  
-  # load(paste0(outdir, "/repl.pattern.",scanmode, ".RData"))
- 
-  # batch_number = strsplit(basename(HMDB_part_file), ".",fixed = TRUE)[[1]][2]
-  # name = as.vector(unlist(strsplit(file, "/", fixed=TRUE)))
-  # name = name[length(name)]
-  # message(paste("File name: ", name))
-  
-  # load samplePeaks
-  # load  outpgrlist
-  # load(file)
-  
-  # # filter on at least signal in two control samples
-  # int.cols = grep(control_label, colnames(outpgrlist),fixed = TRUE)
-  # # barplot(as.numeric(outpgrlist[753, int.cols]))
-  # keep = NULL
-  # keep = apply(outpgrlist, 1, function(x) if (length(which(as.numeric(x[int.cols]) > 0)) > 1) keep=c(keep,TRUE) else keep=c(keep,FALSE))
-  # outpgrlist = outpgrlist[keep,]
-  
-  # replace zeros
-  if (!is.null(outpgrlist)) {
-	  print(dim(outpgrlist))
-	  print(colnames(outpgrlist))
-    for (i in 1:length(names(repl_pattern))){
-	    print(names(repl_pattern)[i])
-      samplePeaks=outpgrlist[,names(repl_pattern)[i]]
-      index=which(samplePeaks<=0)
-      if (!length(index)){
-        next
-      }
-      for (j in 1:length(index)){
-        area = generateGaussian(outpgrlist[index[j],"mzmed.pgrp"],thresh,resol,FALSE,scanmode,int.factor=1*10^5,1,1)$area
-        # for testing purposes, add a fixed random seed
-        # set.seed(123)
-        outpgrlist[index[j], names(repl_pattern)[i]] = rnorm(n=1, mean=area, sd=0.25*area)
-      }
-    }
-  
-  
-    # Identification 
-    
-    # Add average column
-    outpgrlist = cbind(outpgrlist, "avg.int"=apply(outpgrlist[, 7:(ncol(outpgrlist)-4)], 1, mean))
-    
-    if (scanmode=="negative"){
-      label = "MNeg"
-      label2 = "Negative"
-      # take out multiple NaCl adducts
-      look4.add2 <- c("Cl", "Cl37", "For", "NaCl","KCl","H2PO4","HSO4","Na-H","K-H","H2O","I") # ,"min2H","min3H"
-      # HMDB_add_iso=HMDB_add_iso.Neg
-    } else {
-      label = "Mpos"
-      label2 = "Positive"
-      # take out NaCl adducts
-      look4.add2 <- c("Na", "K", "NaCl", "NH4","2Na-H","CH3OH","KCl","NaK-H") # ,"NaCl2","NaCl3","NaCl4","NaCl5")
-      # HMDB_add_iso=HMDB_add_iso.Pos
-    }
-    
-    # Identify noise peaks
-    noise.MZ <- read.table(file="/hpc/dbg_mz/tools/db/TheoreticalMZ_NegPos_incNaCl.txt", sep="\t", header=TRUE, quote = "")
-    noise.MZ <- noise.MZ[(noise.MZ[ , label] != 0), 1:4]
-    final.outlist.idpat2 = ident.hires.noise.HPC(outpgrlist, allAdducts, scanmode=label2, noise.MZ, look4=look4.add2, resol=resol, slope=0, incpt=0, ppm.fixed=ppm, ppm.iso.fixed=ppm)
-    tmp <- final.outlist.idpat2[ , c("assi", "theormz")]
-    colnames(tmp) <- c("assi_noise",  "theormz_noise")
-    
-    final.outlist.idpat3 <- cbind(outpgrlist, tmp)
-   
-    return(final.outlist.idpat3) 
-    # save(final.outlist.idpat3, file=paste("./", name, sep=""))
-  }
-}
diff --git a/DIMS/AddOnFunctions/replaceZeros_setseed.R b/DIMS/AddOnFunctions/replaceZeros_setseed.R
deleted file mode 100644
index 4fafc9a..0000000
--- a/DIMS/AddOnFunctions/replaceZeros_setseed.R
+++ /dev/null
@@ -1,108 +0,0 @@
-replaceZeros <- function(file,scanmode,resol,outdir,thresh,scriptDir,ppm){
-  # file="./results/grouping_rest/negative_1.RData"
-  # scanmode= "negative"
-  # scriptDir="./scripts"
-  # resol=140000
-  # thresh=2000
-  # outdir="./results"
-  
-  control_label="C"
-  
-  source(paste(scriptDir, "AddOnFunctions/sourceDir.R", sep="/"))
-  sourceDir(paste(scriptDir, "AddOnFunctions", sep="/"))
-  
-  dir.create(paste(outdir, "9-samplePeaksFilled", sep="/"), showWarnings = FALSE)
-  
-  # int.factor=1*10^5 # Number of x used to calc area under Gaussian (is not analytic)
-  # scale=2 # Initial value used to estimate scaling parameter
-  # width=1024
-  # height=768
-  
-  # message(paste("file", file))
-  # message(paste("scanmode", scanmode))
-  # message(paste("resol", resol))
-  # message(paste("outdir", outdir))
-  # message(paste("thresh", thresh))
-  # message(paste("scriptDir", scriptDir))
-  
-  load(paste0(outdir, "/repl.pattern.",scanmode, ".RData"))
-  
-  name = as.vector(unlist(strsplit(file, "/", fixed=TRUE)))
-  name = name[length(name)]
-  # message(paste("File name: ", name))
-  
-  # load samplePeaks
-  # load  outpgrlist
-  load(file)
-  
-  # #################################################################################
-  # # filter on at least signal in two control samples
-  # int.cols = grep(control_label, colnames(outpgrlist),fixed = TRUE)
-  # # barplot(as.numeric(outpgrlist[753, int.cols]))
-  # keep = NULL
-  # keep = apply(outpgrlist, 1, function(x) if (length(which(as.numeric(x[int.cols]) > 0)) > 1) keep=c(keep,TRUE) else keep=c(keep,FALSE))
-  # outpgrlist = outpgrlist[keep,]
-  # #################################################################################
-  
-  ################################################################################
-  # For now only replace zeros
-  if (!is.null(outpgrlist)) {
-    for (i in 1:length(names(repl.pattern.filtered))){
-      samplePeaks=outpgrlist[,names(repl.pattern.filtered)[i]]
-      index=which(samplePeaks<=0)
-      if (!length(index)){
-        next
-      }
-      for (j in 1:length(index)){
-        area = generateGaussian(outpgrlist[index[j],"mzmed.pgrp"],thresh,resol,FALSE,scanmode,int.factor=1*10^5,1,1)$area
-        # for testing purposes, add a fixed random seed
-        set.seed(123)
-        outpgrlist[index[j], names(repl.pattern.filtered)[i]] = rnorm(n=1, mean=area, sd=0.25*area)
-      }
-    }
-  
-  ################################################################################
-  
-  
-  #################### identification #########################################################
-    # load(paste(scriptDir, "../db/HMDB_add_iso_corrNaCl.RData", sep="/")) # E:\Metabolomics\LargeDataBase\Apr25_2016
-    
-    # Add average column
-    outpgrlist = cbind(outpgrlist, "avg.int"=apply(outpgrlist[, 7:(ncol(outpgrlist)-4)], 1, mean))
-    
-    if (scanmode=="negative"){
-      label = "MNeg"
-      label2 = "Negative"
-      # take out multiple NaCl adducts
-      look4.add2 <- c("Cl", "Cl37", "For", "NaCl","KCl","H2PO4","HSO4","Na-H","K-H","H2O","I") # ,"min2H","min3H"
-      # HMDB_add_iso=HMDB_add_iso.Neg
-    } else {
-      label = "Mpos"
-      label2 = "Positive"
-      # take out NaCl adducts
-      look4.add2 <- c("Na", "K", "NaCl", "NH4","2Na-H","CH3OH","KCl","NaK-H") # ,"NaCl2","NaCl3","NaCl4","NaCl5")
-      # HMDB_add_iso=HMDB_add_iso.Pos
-    }
-    
-    # # Identification using large database
-    # final.outlist.idpat = iden.code(outpgrlist, HMDB_add_iso, ppm=2, label)
-    # message(paste(sum(final.outlist.idpat[ , "assi_HMDB"] != ""), "assigned peakgroups"))
-    # message(paste(sum(final.outlist.idpat[ , "iso_HMDB"] != ""), "assigned isomeres"))
-    
-    # Identify noise peaks
-    noise.MZ <- read.table(file="/hpc/dbg_mz/tools/db/TheoreticalMZ_NegPos_incNaCl.txt", sep="\t", header=TRUE, quote = "")
-    noise.MZ <- noise.MZ[(noise.MZ[ , label] != 0), 1:4]
-    
-    # Replace "Negative" by "negative" in ident.hires.noise
-    final.outlist.idpat2 = ident.hires.noise.HPC(outpgrlist, allAdducts, scanmode=label2, noise.MZ, look4=look4.add2, resol=resol, slope=0, incpt=0, ppm.fixed=ppm, ppm.iso.fixed=ppm)
-    # message(paste(sum(final.outlist.idpat2[ , "assi"] != ""), "assigned noise peaks"))
-    tmp <- final.outlist.idpat2[ , c("assi", "theormz")]
-    colnames(tmp) <- c("assi_noise",  "theormz_noise")
-    
-    final.outlist.idpat3 <- cbind(outpgrlist, tmp)
-    #############################################################################################
-    
-    # message(paste("File saved: ", paste(outdir, "/samplePeaksFilled/", name, sep="")))
-    save(final.outlist.idpat3, file=paste(outdir, "/9-samplePeaksFilled/", name, sep=""))
-  }
-}
diff --git a/DIMS/AddOnFunctions/run.vbs b/DIMS/AddOnFunctions/run.vbs
deleted file mode 100644
index 640116a..0000000
--- a/DIMS/AddOnFunctions/run.vbs
+++ /dev/null
@@ -1,22 +0,0 @@
-Option Explicit
-On Error Resume Next
-RunExcelMacro
-
-Sub RunExcelMacro() 
-
-Dim xlApp 
-Dim xlBook 
-Dim xlBook_persenal
-
-Set xlApp = CreateObject("Excel.Application")
-xlApp.DisplayAlerts = FALSE
-Set xlBook_persenal = xlApp.Workbooks.Open("C:\Users\awillem8\AppData\Roaming\Microsoft\Excel\XLSTART\PERSONAL.XLSB", 0, True)
-Set xlBook = xlApp.Workbooks.Open("C:\Users\mkerkho7\testmap\xls\P181_Aberant_HMDB.xlsx", 0, True)
-xlApp.Run "PERSONAL.XLSB!FixAndResize"
-xlBook.SaveAs "C:\Users\mkerkho7\testmap\results\P181_Aberant_HMDB.xlsx"
-xlBook.Close False 
-xlApp.Quit
-Set xlBook = Nothing
-Set xlBook_persenal = Nothing 
-Set xlApp = Nothing 
-End Sub
diff --git a/DIMS/AddOnFunctions/runVBAMacro.R b/DIMS/AddOnFunctions/runVBAMacro.R
deleted file mode 100644
index 3836b2f..0000000
--- a/DIMS/AddOnFunctions/runVBAMacro.R
+++ /dev/null
@@ -1,48 +0,0 @@
-runVBAMacro <- function(dir, dir2, vb_script) {
-  
-#   dir="E:\\Metabolomics\\Lynne_BSP-2015-08-05\\results\\xls\\"
-#   dir2="E:\\Metabolomics\\Lynne_BSP-2015-08-05\\results\\xls_fixed\\"  
-  
-dir.create(dir2)
-files=list.files(dir)
-
-script_1 = paste("Option Explicit
-On Error Resume Next
-RunExcelMacro
-
-Sub RunExcelMacro() 
-
-Dim xlApp 
-Dim xlBook 
-Dim xlBook_persenal
-
-Set xlApp = CreateObject(\"Excel.Application\")
-xlApp.DisplayAlerts = FALSE
-Set xlBook_persenal = xlApp.Workbooks.Open(\"C:\\Users\\awillem8\\AppData\\Roaming\\Microsoft\\Excel\\XLSTART\\PERSONAL.XLSB\", 0, True)
-Set xlBook = xlApp.Workbooks.Open(\"", dir, sep="") 
-
-script_2 = "\", 0, True)
-xlApp.Run \"PERSONAL.XLSB!FixAndResize\"
-xlBook.SaveAs \""
-
-script_3 = "\"
-xlBook.Close False 
-xlApp.Quit
-Set xlBook = Nothing
-Set xlBook_persenal = Nothing 
-Set xlApp = Nothing 
-End Sub"
-
-for (i in 1:length(files)){
-  script = paste(script_1, files[i], script_2, dir2, files[i], script_3, sep="")  # dir2, files[i], 
-  
-  message(script)
-  
-  fileConn = file("./src/run.vbs")
-  writeLines(script, fileConn)
-  close(fileConn)
-  
-  system(vb_script)
-}
-}
-
diff --git a/DIMS/AddOnFunctions/searchMZRange.R b/DIMS/AddOnFunctions/searchMZRange.R
deleted file mode 100644
index 43a7171..0000000
--- a/DIMS/AddOnFunctions/searchMZRange.R
+++ /dev/null
@@ -1,187 +0,0 @@
-searchMZRange <- function(range,values,int.factor,scale,resol,outdir,sampname,scanmode,plot,width,height,thresh){
-  # range=sub_range
-  
-  end=NULL
-  index=as.vector(which(range!=0))
-  
-  # bad infusion
-  if (length(index)==0) return(values)
-  
-  start=index[1]
-  subRangeLength = 15
-  
-  for (i in 1:length(index)){
-    # for (i in 1:129000){
-    
-    if (i<length(index) & (index[i+1] - index[i]) > 1){
-      
-      end=index[i]
-      
-      # start=395626
-      # end=395640
-      # i=25824
-      
-      # gaan met de banaan
-      # 128836
-      # start 1272853
-      # mz start 316.83302
-      # end 1272870
-      # mz end 316.84269
-      
-      x = as.numeric(names(range)[c(start:end)])
-      y = as.vector(range[c(start:end)])
-      #       # Trim zeros
-      #       x = as.vector(trimZeros(x,y)[[1]])
-      #       y = as.vector(trimZeros(x,y)[[2]])
-      
-      if (length(y)!=0) {
-        
-        # check if intensity above thresh
-        if (max(y) < thresh | is.nan(max(y))) {
-          start=index[i+1]
-          next
-        }
-        
-        # cat("gaan met de banaan")
-        # cat(i)
-        # cat(paste("start", start, sep=" "))
-        # cat(paste("mz start", x[1], sep=" "))
-        # cat(paste("end", end, sep=" "))
-        # cat(paste("mz end", x[length(x)], sep=" "))
-        
-        if (length(y)>subRangeLength) {
-          
-          y[which(y<min(y)*1.1)] = 0
-          sub_range = y
-          names(sub_range) = x
-          
-          # Range to long!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-          values = searchMZRange(sub_range,values,int.factor,scale,resol,outdir,sampname,scanmode,plot,width,height,thresh)
-          
-        } else if (length(y)>3) {
-          # Check only zeros
-          if (sum(y)==0) next
-          
-          rval = fitGaussianInit(x,y,int.factor,scale,resol,outdir,sampname, scanmode, plot,width,height,i,start,end)
-          
-          if (rval$qual[1]==1) {
-            
-            rval = generateGaussian(x,y,resol,plot,scanmode,int.factor, width, height)
-            
-            values$mean = c(values$mean, rval$mean)
-            values$area = c(values$area, rval$area)
-            values$nr = c(values$nr, sampname)
-            values$min = c(values$min, rval$min)
-            values$max = c(values$max, rval$max)
-            values$qual = c(values$qual, 0)
-            
-            values$spikes = values$spikes + 1
-            
-          } else {
-            for (j in 1:length(rval$mean)){
-              values$mean = c(values$mean, rval$mean[j])
-              values$area = c(values$area, rval$area[j])
-              values$nr = c(values$nr, sampname)
-              values$min = c(values$min, rval$min[1])
-              values$max = c(values$max, rval$max[1])
-              values$qual = c(values$qual, rval$qual[1])
-            }
-          }
-          
-        } else {
-          
-          rval = generateGaussian(x,y,resol,plot,scanmode,int.factor, width, height)
-          
-          values$mean = c(values$mean, rval$mean)
-          values$area = c(values$area, rval$area)
-          values$nr = c(values$nr, sampname)
-          values$min = c(values$min, rval$min)
-          values$max = c(values$max, rval$max)
-          values$qual = c(values$qual, 0)
-          
-          values$spikes = values$spikes + 1
-        }
-      }
-      start=index[i+1]
-    }
-  }
-  
-  # last little range
-  end = index[length(index)]
-  x = as.numeric(names(range)[c(start:end)])
-  y = as.vector(range[c(start:end)])
-  
-  #   x = as.vector(trimZeros(x,y)[[1]])
-  #   y = as.vector(trimZeros(x,y)[[2]])
-  
-  if (length(y)!=0) {
-    
-    # check if intensity above thresh
-    if (max(y) < thresh | is.nan(max(y))) {
-      #start=index[i+1]
-      # do nothing!!
-    } else {
-      
-      #       cat("gaan met de banaan")
-      #       cat(paste("start", start, sep=" "))
-      #       cat(paste("mz start", x[1], sep=" "))
-      #       cat(paste("end", end, sep=" "))
-      #       cat(paste("mz end", x[length(x)], sep=" "))
-      
-      if (length(y)>subRangeLength) {
-        
-        y[which(y<min(y)*1.1)] = 0
-        sub_range = y
-        names(sub_range) = x
-        
-        # Range to long!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-        values = searchMZRange(sub_range,values,int.factor,scale,resol,outdir,sampname,scanmode,plot,width,height,thresh)
-        
-      } else if (length(y)>3) {
-        
-        # Check only zeros
-        if (sum(y)==0) next
-        
-        rval = fitGaussianInit(x,y,int.factor,scale,resol,outdir,sampname,scanmode,plot,width,height,i,start,end)
-        
-        if (rval$qual[1]==1) {
-          #cat("Quality = 1!!!")
-          rval = generateGaussian(x,y,resol,plot,scanmode,int.factor, width, height)
-          
-          values$mean = c(values$mean, rval$mean)
-          values$area = c(values$area, rval$area)
-          values$nr = c(values$nr, sampname)
-          values$min = c(values$min, rval$min)
-          values$max = c(values$max, rval$max)
-          values$qual = c(values$qual, 0)
-          
-          values$spikes = values$spikes + 1
-          
-        } else {
-          for (j in 1:length(rval$mean)){
-            values$mean = c(values$mean, rval$mean[j])
-            values$area = c(values$area, rval$area[j])
-            values$nr = c(values$nr, sampname)
-            values$min = c(values$min, rval$min[1])
-            values$max = c(values$max, rval$max[1])
-            values$qual = c(values$qual, rval$qual[1])
-          }
-        }
-      } else {
-        rval = generateGaussian(x,y,resol,plot,scanmode,int.factor, width, height)
-        
-        values$mean = c(values$mean, rval$mean)
-        values$area = c(values$area, rval$area)
-        values$nr = c(values$nr, sampname)
-        values$min = c(values$min, rval$min)
-        values$max = c(values$max, rval$max)
-        values$qual = c(values$qual, 0)
-        
-        values$spikes = values$spikes + 1
-      }
-    }
-  }
-  
-  return(values)
-  #return(list("mean"=retVal$mean, "area"=retVal$area, "qual"=retVal$qual, "min"=retVal$min, "max"=retVal$max, "nr"=sample.nr, "spikes"=spikes))
-}
diff --git a/DIMS/AddOnFunctions/sourceDir.R b/DIMS/AddOnFunctions/sourceDir.R
deleted file mode 100644
index abf4c9e..0000000
--- a/DIMS/AddOnFunctions/sourceDir.R
+++ /dev/null
@@ -1,8 +0,0 @@
-# source add on functions
-sourceDir <- function(path, trace = TRUE, ...) {
-  for (nm in list.files(path, pattern = "[.][RrSsQq]$")) {
-    #if(trace) cat(nm,":")
-    source(file.path(path, nm), ...)
-    #if(trace) cat("\n")
-  }
-}
diff --git a/DIMS/AddOnFunctions/statistics_z.R b/DIMS/AddOnFunctions/statistics_z.R
deleted file mode 100644
index 0a92d3d..0000000
--- a/DIMS/AddOnFunctions/statistics_z.R
+++ /dev/null
@@ -1,69 +0,0 @@
-statistics_z <- function(peaklist, sortCol, adducts){
-  # peaklist=as.data.frame(outlist.adducts.HMDB)
-  # plotdir="./results/plots/adducts"
-  # filename="./results/allpgrps_stats.txt"
-  # adducts=TRUE
-  
-  # peaklist=outlist.tot
-  # sortCol=NULL
-  # adducts=FALSE
-  
-  case_label = "P"
-  control_label = "C"
-  startcol = dim(peaklist)[2]+3 
-  
-  # calculate mean and sd for Control group
-  ctrl.cols <- grep(control_label, colnames(peaklist),fixed = TRUE) # 5:41
-  int.cols <- c(grep(control_label, colnames(peaklist),fixed = TRUE), grep(case_label, colnames(peaklist),fixed = TRUE))
-  peaklist[,int.cols][peaklist[,int.cols]==0] = NA
-  
-  # tmp = data.matrix(peaklist[ , ctrl.cols], rownames.force = TRUE)
-  tmp = peaklist[ , ctrl.cols, drop=FALSE]
-  
-  peaklist$avg.ctrls = apply(tmp, 1, function(x) mean(as.numeric(x),na.rm = TRUE))
-  peaklist$sd.ctrls = apply(tmp, 1, function(x) sd(as.numeric(x),na.rm = TRUE))
-  
-  cnames.z = NULL
-  
-  for (i in int.cols) {
-    # message(i)
-    cname = colnames(peaklist)[i]
-    cnames.z = c(cnames.z, paste(cname, "Zscore", sep="_"))
-    zscores.1col = (as.numeric(as.vector(unlist(peaklist[ , i]))) - peaklist$avg.ctrls) / peaklist$sd.ctrls
-    peaklist = cbind(peaklist, zscores.1col)
-  }
-  
-  colnames(peaklist)[startcol:ncol(peaklist)] = cnames.z
-  
-  z.cols = grep("Zscore", colnames(peaklist),fixed = TRUE)
-  
-  if (!adducts){
-    if ((dim(peaklist[, z.cols])[2]+6)!=(startcol-1)){
-      ppmdev=array(1:dim(peaklist)[1], dim=c(dim(peaklist)[1]))
-      
-      # calculate ppm deviation
-      for (i in 1:dim(peaklist)[1]){
-        if (!is.na(peaklist$theormz_HMDB[i]) & !is.null(peaklist$theormz_HMDB[i]) & (peaklist$theormz_HMDB[i]!="")){
-          ppmdev[i] = 10^6*(as.numeric(as.vector(peaklist$mzmed.pgrp[i]))-as.numeric(as.vector(peaklist$theormz_HMDB[i])))/as.numeric(as.vector(peaklist$theormz_HMDB[i]))
-        } else {
-          ppmdev[i]=NA
-        }
-      }
-      
-      peaklist = cbind(peaklist[, 1:6], ppmdev=ppmdev, peaklist[ , 7:ncol(peaklist)])
-    }  
-  }
-  
-  #peaklist = peaklist[order(peaklist[,sortCol]),]
-  
-  #   # Order on average Z-score
-  #   tmp = peaklist[,grep("Zscore", colnames(peaklist))]
-  #   tmp.p = tmp[,grep("P", colnames(tmp)),drop=FALSE]
-  #   tmp.p.avg = apply(tmp.p, 1, mean)
-  #   
-  #   peaklist = cbind(peaklist, "avg.z.score"=tmp.p.avg)
-  #   peaklist = peaklist[order(abs(tmp.p.avg), decreasing = TRUE),]
-  
-  return(peaklist)
-  
-}  
diff --git a/DIMS/AddOnFunctions/sumCurves.R b/DIMS/AddOnFunctions/sumCurves.R
deleted file mode 100644
index bcc1485..0000000
--- a/DIMS/AddOnFunctions/sumCurves.R
+++ /dev/null
@@ -1,39 +0,0 @@
-sumCurves <- function(mean1, mean2, scale1, scale2, sigma1, sigma2, x2, x, resol, plot) {
-  #   mean1=mean[i-1]
-  #   mean2=mean[i]
-  #   scale1=scale[i-1]
-  #   scale2=scale[i]
-  #   sigma1=sigma[i-1]
-  #   sigma2=sigma[i]
-  
-  # message("=============================================================> sum 2 curves!")
-  
-  sumFit=(scale1*dnorm(x2,mean1,sigma1))+(scale2*dnorm(x2,mean2,sigma2))
-  if (plot) lines(x2,sumFit,col="brown")
-  
-  #mean1plus2 = mean(c(mean1,mean2))
-  mean1plus2 = weighted.mean(c(mean1,mean2),c(max(scale1*dnorm(x2,mean1,sigma1)),max(scale2*dnorm(x2,mean2,sigma2))))
-  
-  if (plot) abline(v = mean1plus2, col="brown")
-  fwhm = getFwhm(mean1plus2, resol)
-  half_max = max(sumFit)*0.5
-  if (plot) lines(c(mean1plus2 - 0.5*fwhm, mean1plus2 + 0.5*fwhm),c(half_max,half_max),col="orange")
-  
-  # sumFit=(scale1*dnorm(x,mean1,sigma1))+(scale2*dnorm(x,mean2,sigma2))
-  #  fq=abs(sum(y) - sum(sumFit))/sum(y)
-  
-  #   h2=c(paste("mean =", mean1plus2, sep=" "),
-  #        paste("fq =", fq, sep=" "))
-  #   
-  #   legend("topright", legend=h2)
-  
-  # I assume that the sum of the distributions if also normal, which is not
-  #area = sum(scale1*dnorm(x2,mean1,sigma1))+sum(scale2*dnorm(x2,mean2,sigma2))
-  #area = max(scale1*dnorm(x2,mean1,sigma1))+max(scale2*dnorm(x2,mean2,sigma2))
-  area = max(sumFit) 
-  scale = scale1 + scale2
-  sigma = (fwhm/2)*0.85
-  
-  return(list("mean"= mean1plus2,"area"=area, "scale"=scale, "sigma"=sigma)) # "qual"=fq
-  
-} 
diff --git a/DIMS/AddOnFunctions/trimZeros.R b/DIMS/AddOnFunctions/trimZeros.R
deleted file mode 100644
index 0fc5ea3..0000000
--- a/DIMS/AddOnFunctions/trimZeros.R
+++ /dev/null
@@ -1,8 +0,0 @@
-trimZeros <- function(x, y) {
-  tmp = which(y==0)
-  if (length(tmp)!=0){
-    y = y[-tmp]
-    x = x[-tmp]
-  }
-  return(list(x,y))
-}

From d586b345f95b5de5b630d4395437646817af2de9 Mon Sep 17 00:00:00 2001
From: ALuesink <anneluesink@live.nl>
Date: Tue, 27 Aug 2024 11:01:20 +0200
Subject: [PATCH 02/18] Changes for HMDB V5 update

---
 DIMS/GenerateExcel.R              | 21 +++++++++--
 DIMS/PeakGrouping.R               | 62 ++++++++++++++++++++++---------
 DIMS/SumAdducts.R                 |  2 +-
 DIMS/Utils/merge_duplicate_rows.R |  5 ++-
 4 files changed, 67 insertions(+), 23 deletions(-)

diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R
index d36ac29..e6d6b70 100644
--- a/DIMS/GenerateExcel.R
+++ b/DIMS/GenerateExcel.R
@@ -102,7 +102,8 @@ tmp <- cbind(tmp, "HMDB_name" = tmp_hmdb_name_pos)
 outlist <- rbind(tmp, tmp_pos_left, tmp_neg_left)
 
 # Filter for biological relevance
-peaks_in_list <- which(rownames(outlist) %in% rownames(rlvnc))
+# peaks_in_list <- which(rownames(outlist) %in% rownames(rlvnc))
+peaks_in_list <- which(rownames(outlist) %in% rlvnc$HMDB_key)
 outlist <- cbind(outlist[peaks_in_list, ], as.data.frame(rlvnc[rownames(outlist)[peaks_in_list], ]))
 # filter out all irrelevant HMDBs
 outlist <- outlist %>%
@@ -568,13 +569,25 @@ if (z_score == 1) {
     pku_sample_name <- positive_control_list[grepl("P1003", positive_control_list)]
     lpi_sample_name <- positive_control_list[grepl("P1005", positive_control_list)]
 
-    pa_codes <- c("HMDB00824", "HMDB00783", "HMDB00123")
-    pku_codes <- c("HMDB00159")
-    lpi_codes <- c("HMDB00904", "HMDB00641", "HMDB00182")
+    # pa_codes <- c("HMDB0000824", "HMDB0000783", "HMDB0000123")
+    # pku_codes <- c("HMDB0000159")
+    # lpi_codes <- c("HMDB0000904", "HMDB0000641", "HMDB0000182")
+
+    pa_codes <- c("HMDB0000824", "HMDB0000725", "HMDB0000123")
+    pku_codes <- c("HMDB0000159")
+    lpi_codes <- c("HMDB0000904", "HMDB0000641", "HMDB0000182")
+
+    pa_names <- c("Propionylcarnitine", "Propionylglycine", "Glycine")
+    pku_names <- c("L-Phenylalanine")
+    lpi_names <- c("Citrulline", "L-Glutamine", "L-Lysine")
 
     pa_data <- outlist[pa_codes, c("HMDB_code", "name", pa_sample_name)]
     pa_data <- reshape2::melt(pa_data, id.vars = c("HMDB_code", "name"))
     colnames(pa_data) <- c("HMDB.code", "HMDB.name", "Sample", "Zscore")
+    # change HMDB names because propionylglycine doesn't have its own line, rowname is HMDB0000725 (4-hydroxyproline)
+    pa_data$HMDB.name <- pa_codes
+    # change HMDB codes so the propionylglycine is the correct HMDB ID
+    pa_data$HMDB.code <- c("HMDB0000824", "HMDB0000783", "HMDB0000123")
 
     pku_data <- outlist[pku_codes, c("HMDB_code", "name", pku_sample_name)]
     pku_data <- reshape2::melt(pku_data, id.vars = c("HMDB_code", "name"))
diff --git a/DIMS/PeakGrouping.R b/DIMS/PeakGrouping.R
index dc29e76..a801500 100644
--- a/DIMS/PeakGrouping.R
+++ b/DIMS/PeakGrouping.R
@@ -105,9 +105,15 @@ while (dim(hmdb_add_iso)[1] > 0) {
       # Compose a list of compounds, adducts or isotopes with corresponding m/z
       if (dim(tmplist_mass)[1] > 0) {
         # metabolites
-        assi_hmdb <- as.character(paste(as.character(tmplist_mass[, "CompoundName"]), 
+        assi_hmdb <- as.character(paste(as.character(tmplist_mass[, "CompoundName"]),
 					collapse = ";"))
-        hmdb_code <- as.character(paste(as.character(rownames(tmplist_mass)), 
+        all_hmdb_names <- as.character(paste(as.character(tmplist_mass[, "HMDB_name_all"]),
+					collapse = ";"))
+        hmdb_code <- as.character(paste(as.character(rownames(tmplist_mass)),
+					collapse = ";"))
+        all_hmdb_ids <- as.character(paste(as.character(tmplist_mass[, "HMDB_ID_all"]),
+					collapse = ";"))
+        all_sec_hmdb_ids <- as.character(paste(as.character(tmplist_mass[, "sec_HMDB_ID"]),
 					collapse = ";"))
         theormz_hmdb <- as.numeric(tmplist_mass[1, column_label])
 
@@ -115,17 +121,27 @@ while (dim(hmdb_add_iso)[1] > 0) {
         if (!is.null(tmplist_mass_adduct)) {
           if (dim(tmplist_mass_adduct)[1] > 0) {
             if (is.na(assi_hmdb)) {
-              assi_hmdb <- as.character(paste(as.character(tmplist_mass_adduct[, "CompoundName"]), 
+              assi_hmdb <- as.character(paste(as.character(tmplist_mass_adduct[, "CompoundName"]),
+					      collapse = ";"))
+	            all_hmdb_names <- as.character(paste(as.character(tmplist_mass_adduct[, "HMDB_name_all"]),
+                collapse = ";"))
+              hmdb_code <- as.character(paste(as.character(rownames(tmplist_mass_adduct)),
+					      collapse = ";"))
+              all_hmdb_ids <- as.character(paste(as.character(tmplist_mass_adduct[, "HMDB_ID_all"]),
 					      collapse = ";"))
-              hmdb_code <- as.character(paste(as.character(rownames(tmplist_mass_adduct)), 
+              all_sec_hmdb_ids <- as.character(paste(as.character(tmplist_mass[, "sec_HMDB_ID"]),
 					      collapse = ";"))
             } else {
-              assi_hmdb <- paste(assi_hmdb,
-                                 as.character(paste(as.character(tmplist_mass_adduct[, "CompoundName"]), 
+              assi_hmdb <- paste(assi_hmdb, as.character(paste(as.character(tmplist_mass_adduct[, "CompoundName"]),
 						    collapse = ";")), sep = ";")
-              hmdb_code <- paste(hmdb_code,
-                                 as.character(paste(as.character(rownames(tmplist_mass_adduct)), 
+	            all_hmdb_names <- paste(all_hmdb_names, as.character(paste(as.character(tmplist_mass_adduct[, "HMDB_name_all"]),
+                collapse = ";")), sep=";")
+              hmdb_code <- paste(hmdb_code, as.character(paste(as.character(rownames(tmplist_mass_adduct)),
 						    collapse = ";")), sep = ";")
+              all_hmdb_ids <- paste(all_hmdb_ids, as.character(paste(as.character(tmplist_mass_adduct[, "HMDB_ID_all"]),
+					      collapse = ";")),  sep = ";")
+              all_sec_hmdb_ids <- as.character(paste(as.character(tmplist_mass[, "sec_HMDB_ID"]),
+					      collapse = ";"))
             }
           }
         }
@@ -146,17 +162,27 @@ while (dim(hmdb_add_iso)[1] > 0) {
         if (!is.null(tmplist_mass_adduct)) {
           if (dim(tmplist_mass_adduct)[1] > 0) {
             if (is.na(assi_hmdb)) {
-              assi_hmdb <- as.character(paste(as.character(tmplist_mass_adduct[, "CompoundName"]), 
+              assi_hmdb <- as.character(paste(as.character(tmplist_mass_adduct[, "CompoundName"]),
+					      collapse = ";"))
+	            all_hmdb_names <- as.character(paste(as.character(tmplist_mass_adduct[, "HMDB_name_all"]),
+                collapse = ";"))
+              hmdb_code <- as.character(paste(as.character(rownames(tmplist_mass_adduct)),
 					      collapse = ";"))
-              hmdb_code <- as.character(paste(as.character(rownames(tmplist_mass_adduct)), 
+              all_hmdb_ids <- as.character(paste(as.character(tmplist_mass_adduct[, "HMDB_ID_all"]),
 					      collapse = ";"))
+              all_sec_hmdb_ids <- as.character(paste(as.character(tmplist_mass[, "sec_HMDB_ID"]),
+					      collapse = ";"))                
             } else {
-              assi_hmdb <- paste(assi_hmdb,
-                                 as.character(paste(as.character(tmplist_mass_adduct[, "CompoundName"]), 
+              assi_hmdb <- paste(assi_hmdb, as.character(paste(as.character(tmplist_mass_adduct[, "CompoundName"]),
 						    collapse = ";")), sep = ";")
-              hmdb_code <- paste(hmdb_code,
-                                 as.character(paste(as.character(rownames(tmplist_mass_adduct)), 
+	            all_hmdb_names <- paste(all_hmdb_names, as.character(paste(as.character(tmplist_mass_adduct[, "HMDB_name_all"]),
+                collapse = ";")), sep=";")
+              hmdb_code <- paste(hmdb_code, as.character(paste(as.character(rownames(tmplist_mass_adduct)),
 						    collapse = ";")), sep = ";")
+              all_hmdb_ids <- paste(all_hmdb_ids, as.character(paste(as.character(tmplist_mass_adduct[, "HMDB_ID_all"]),
+					      collapse = ";")),  sep = ";")
+              all_sec_hmdb_ids <- as.character(paste(as.character(tmplist_mass[, "sec_HMDB_ID"]),
+					      collapse = ";"))
             }
           }
         }
@@ -164,7 +190,7 @@ while (dim(hmdb_add_iso)[1] > 0) {
         # isotopes of metabolites
         if (!is.null(tmplist_mass_iso)) {
           if (dim(tmplist_mass_iso)[1] > 0) {
-            iso_hmdb <- as.character(paste(as.character(tmplist_mass_iso[, "CompoundName"]), 
+            iso_hmdb <- as.character(paste(as.character(tmplist_mass_iso[, "CompoundName"]),
 					   collapse = ";"))
           }
         }
@@ -173,7 +199,7 @@ while (dim(hmdb_add_iso)[1] > 0) {
       } else if (!is.null(tmplist_mass_iso)) {
         if (dim(tmplist_mass_iso)[1] > 0) {
           theormz_hmdb <- as.numeric(tmplist_mass_iso[1, column_label])
-          iso_hmdb <- as.character(paste(as.character(tmplist_mass_iso[, "CompoundName"]), 
+          iso_hmdb <- as.character(paste(as.character(tmplist_mass_iso[, "CompoundName"]),
 					 collapse = ";"))
         }
       }
@@ -184,7 +210,9 @@ while (dim(hmdb_add_iso)[1] > 0) {
       data.frame("mzmed.pgrp" = mzmed_pgrp, "fq.best" = fq_best_pgrp, "fq.worst" = fq_worst_pgrp, nrsamples, 
 		 "mzmin.pgrp" = mzmin_pgrp, "mzmax.pgrp" = mzmax_pgrp),
       t(as.matrix(ints_allsamps)),
-      data.frame("assi_HMDB" = assi_hmdb, "iso_HMDB" = iso_hmdb, "HMDB_code" = hmdb_code, "theormz_HMDB" = theormz_hmdb)
+      # data.frame("assi_HMDB" = assi_hmdb, "iso_HMDB" = iso_hmdb, "HMDB_code" = hmdb_code, "theormz_HMDB" = theormz_hmdb)
+      data.frame("assi_HMDB" = assi_hmdb, "all_hmdb_names" = all_hmdb_names, "iso_HMDB" = iso_hmdb,
+		 "HMDB_code" = hmdb_code, "all_hmdb_ids" = all_hmdb_ids, "sec_hmdb_ids" = all_sec_hmdb_ids, "theormz_HMDB" = theormz_hmdb)
     ))
   }
 
diff --git a/DIMS/SumAdducts.R b/DIMS/SumAdducts.R
index 699da4a..4670e88 100755
--- a/DIMS/SumAdducts.R
+++ b/DIMS/SumAdducts.R
@@ -9,7 +9,7 @@ z_score <- as.numeric(cmd_args[3])
 
 sum_adducts <- function(peaklist, theor_mz, grpnames_long, adducts, batch_number, scanmode, outdir, z_score) {
   hmdb_codes <- rownames(theor_mz)
-  hmdb_names <- theor_mz[, 1, drop = FALSE]
+  hmdb_names <- theor_mz[, "CompoundName", drop = FALSE]
   hmdb_names[] <- lapply(hmdb_names, as.character)
 
   # remove isotopes
diff --git a/DIMS/Utils/merge_duplicate_rows.R b/DIMS/Utils/merge_duplicate_rows.R
index da3bc52..901f116 100644
--- a/DIMS/Utils/merge_duplicate_rows.R
+++ b/DIMS/Utils/merge_duplicate_rows.R
@@ -41,7 +41,10 @@ merge_duplicate_rows <- function(peakgroup_list) {
     single_peakgroup[, "assi_noise"] <- collapse("assi_noise", peakgroup_list, peaklist_index)
     if (single_peakgroup[, "assi_noise"] == ";") single_peakgroup[, "assi_noise"] <- NA
     single_peakgroup[, "theormz_noise"] <- collapse("theormz_noise", peakgroup_list, peaklist_index)
-    if (single_peakgroup[,"theormz_noise"] == "0;0") single_peakgroup[, "theormz_noise"] <- NA
+    if (single_peakgroup[, "theormz_noise"] == "0;0") single_peakgroup[, "theormz_noise"] <- NA
+    single_peakgroup[, "all_hmdb_ids"] <- collapse("all_hmdb_ids", peakgroup_list, peaklist_index)
+    single_peakgroup[, "sec_hmdb_ids"] <- collapse("sec_hmdb_ids", peakgroup_list, peaklist_index)
+    if (single_peakgroup[, "sec_hmdb_ids"] == ";") single_peakgroup[, "sec_hmdb_ids"] < NA
 
     # keep track of deduplicated entries
     collect <- rbind(collect, single_peakgroup)

From c12f7e83e27e88cc2617ea3e7358674591604d55 Mon Sep 17 00:00:00 2001
From: Anne Luesink <anneluesink@live.nl>
Date: Fri, 6 Sep 2024 16:03:56 +0200
Subject: [PATCH 03/18] Fixed missing colums error

---
 DIMS/PeakGrouping.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DIMS/PeakGrouping.R b/DIMS/PeakGrouping.R
index a801500..53e42de 100644
--- a/DIMS/PeakGrouping.R
+++ b/DIMS/PeakGrouping.R
@@ -77,7 +77,7 @@ while (dim(hmdb_add_iso)[1] > 0) {
     })))
 
     # Initialize
-    assi_hmdb <- iso_hmdb <- hmdb_code <- NA
+    assi_hmdb <- iso_hmdb <- hmdb_code <- all_hmdb_names <- all_hmdb_ids <- all_sec_hmdb_ids <- NA
     tmplist_mass_iso <- tmplist_mass_adduct <- NULL
 
     # find all entries in HMDB part with mass within ppm range

From 38df3fc1a552cabd0204ebc414a9507c3928e5e8 Mon Sep 17 00:00:00 2001
From: Anne Luesink <anneluesink@live.nl>
Date: Fri, 6 Sep 2024 16:04:51 +0200
Subject: [PATCH 04/18] Added extra HMDB info columns

---
 DIMS/SumAdducts.R | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/DIMS/SumAdducts.R b/DIMS/SumAdducts.R
index 4670e88..6b7d076 100755
--- a/DIMS/SumAdducts.R
+++ b/DIMS/SumAdducts.R
@@ -57,6 +57,11 @@ sum_adducts <- function(peaklist, theor_mz, grpnames_long, adducts, batch_number
     if (!is.null(adductsum)) {
       rownames(adductsum) <- names
       adductsum <- cbind(adductsum, "HMDB_name" = names_long)
+      # Add HMDB info
+      cols_hmdb_info <- c("HMDB_ID_all", "sec_HMDB_ID", "HMDB_name_all")
+      hmdb_info <- theor_mz[names, cols_hmdb_info]
+      adductsum <- cbind(adductsum, hmdb_info)
+      
       save(adductsum, file = paste(scanmode, "_", batch_number, "_SummedAdducts.RData", sep = ""))
     }
   }

From cc164edf955cdfb1abb48cacce6e6c864ff0599a Mon Sep 17 00:00:00 2001
From: Anne Luesink <anneluesink@live.nl>
Date: Fri, 6 Sep 2024 16:05:38 +0200
Subject: [PATCH 05/18] Changes for extra HMDB info columns

---
 DIMS/GenerateExcel.R | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R
index e6d6b70..01f3c24 100644
--- a/DIMS/GenerateExcel.R
+++ b/DIMS/GenerateExcel.R
@@ -88,28 +88,33 @@ index_neg <- which(rownames(outlist_neg_adducts_hmdb) %in% rownames(outlist_pos_
 index_pos <- which(rownames(outlist_pos_adducts_hmdb) %in% rownames(outlist_neg_adducts_hmdb))
 
 # Only continue with HMDB codes (rows) that were found in both positive mode and remove last column (hmdb_name)
-tmp_pos <- outlist_pos_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos], 1:(dim(outlist_pos_adducts_hmdb)[2] - 1)]
-tmp_hmdb_name_pos <- outlist_pos_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos], dim(outlist_pos_adducts_hmdb)[2]]
+tmp_pos <- outlist_pos_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos], ] %>% select(-c(HMDB_name, HMDB_name_all, HMDB_ID_all, sec_HMDB_ID))
+tmp_pos_info <- outlist_pos_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos], ]  %>% select(HMDB_name, HMDB_name_all, HMDB_ID_all, sec_HMDB_ID)
+# tmp_hmdb_name_pos <- outlist_pos_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos], dim(outlist_pos_adducts_hmdb)[2]]
 tmp_pos_left <- outlist_pos_adducts_hmdb[-index_pos, ]
 # same for negative mode
-tmp_neg <- outlist_neg_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos], 1:(dim(outlist_neg_adducts_hmdb)[2] - 1)]
+tmp_neg <- outlist_neg_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos], ] %>% select(-c(HMDB_name, HMDB_name_all, HMDB_ID_all, sec_HMDB_ID))
 tmp_neg_left <- outlist_neg_adducts_hmdb[-index_neg, ]
-
+print("combine pos + neg")
 # Combine positive and negative numbers and paste back HMDB column
 tmp <- apply(tmp_pos, 2, as.numeric) + apply(tmp_neg, 2, as.numeric)
 rownames(tmp) <- rownames(tmp_pos)
-tmp <- cbind(tmp, "HMDB_name" = tmp_hmdb_name_pos)
+tmp <- cbind(tmp, tmp_pos_info)
 outlist <- rbind(tmp, tmp_pos_left, tmp_neg_left)
+outlist <- outlist %>% arrange(rownames(outlist))
+print("Add biological relevance")
 
 # Filter for biological relevance
 # peaks_in_list <- which(rownames(outlist) %in% rownames(rlvnc))
 peaks_in_list <- which(rownames(outlist) %in% rlvnc$HMDB_key)
-outlist <- cbind(outlist[peaks_in_list, ], as.data.frame(rlvnc[rownames(outlist)[peaks_in_list], ]))
+rlvnc_in_list <- rlvnc %>% filter(HMDB_key %in% rownames(outlist)[peaks_in_list])
+outlist <- cbind(outlist[peaks_in_list, ], as.data.frame(rlvnc_in_list))
+
 # filter out all irrelevant HMDBs
 outlist <- outlist %>%
   tibble::rownames_to_column("rowname") %>%
-  filter(!grepl("Exogenous|Drug|exogenous", relevance)) %>%
-  tibble::column_to_rownames("rowname")
+  filter(grepl("relevant|Onbekend|Internal", relevance)) %>%
+  tibble::column_to_rownames("rowname")print("Add HMDB_code column")
 
 # Add HMDB_code column with all the HMDB ID and sort on it
 outlist <- cbind(outlist, "HMDB_code" = rownames(outlist))
@@ -260,8 +265,9 @@ if (z_score == 1) {
   openxlsx::setRowHeights(wb, filelist, rows = c(1:nrow(outlist)), heights = 18)
   openxlsx::setColWidths(wb, filelist, cols = c(1:ncol(outlist)), widths = 20)
 }
-
+print("Write Excel file")
 # write Excel file
+outlist$name <- stringi::stri_enc_toutf8(outlist$name)
 openxlsx::writeData(wb, sheet = 1, outlist, startCol = 1)
 xlsx_name <- paste0(outdir, "/", project, ".xlsx")
 openxlsx::saveWorkbook(wb, xlsx_name, overwrite = TRUE)
@@ -297,6 +303,7 @@ is_summed$Intensity <- as.numeric(as.character(is_summed$Intensity))
 
 # Retrieve IS positive mode
 is_pos <- as.data.frame(subset(outlist_pos_adducts_hmdb, rownames(outlist_pos_adducts_hmdb) %in% is_codes))
+is_pos <- is_pos[c(names(repl_pattern))]
 is_pos$HMDB_name <- is_list[match(row.names(is_pos), is_list$HMDB_code, nomatch = NA), "name"]
 is_pos$HMDB.code <- row.names(is_pos)
 is_pos <- reshape2::melt(is_pos, id.vars = c("HMDB.code", "HMDB_name"))
@@ -308,6 +315,7 @@ is_pos$Intensity <- as.numeric(as.character(is_pos$Intensity))
 
 # Retrieve IS negative mode
 is_neg <- as.data.frame(subset(outlist_neg_adducts_hmdb, rownames(outlist_neg_adducts_hmdb) %in% is_codes))
+is_neg <- is_neg[c(names(repl_pattern))]
 is_neg$HMDB_name <- is_list[match(row.names(is_neg), is_list$HMDB_code, nomatch = NA), "name"]
 is_neg$HMDB.code <- row.names(is_neg)
 is_neg <- reshape2::melt(is_neg, id.vars = c("HMDB.code", "HMDB_name"))
@@ -316,7 +324,6 @@ is_neg$Matrix <- dims_matrix
 is_neg$Rundate <- rundate
 is_neg$Project <- project
 is_neg$Intensity <- as.numeric(as.character(is_neg$Intensity))
-
 # Save results
 save(is_pos, is_neg, is_summed, file = paste0(outdir, "/", project, "_IS_results.RData"))
 

From b072e3a1598146b55bc1b13d109ea362cb4dd096 Mon Sep 17 00:00:00 2001
From: Anne Luesink <anneluesink@live.nl>
Date: Fri, 6 Sep 2024 16:50:51 +0200
Subject: [PATCH 06/18] Changes for ordering dataframe

---
 DIMS/GenerateViolinPlots.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DIMS/GenerateViolinPlots.R b/DIMS/GenerateViolinPlots.R
index 54bd193..a0af859 100644
--- a/DIMS/GenerateViolinPlots.R
+++ b/DIMS/GenerateViolinPlots.R
@@ -344,8 +344,8 @@ if (z_score == 1){
 
     # from table expected_biomarkers, choose selected columns
     select_columns <- c("Disease", "HMDB_code", "HMDB_name")
-    select_col_nrs <- which(colnames(expected_biomarkers) %in% select_columns)
-    expected_biomarkers_select <- expected_biomarkers[, select_col_nrs]
+    #select_col_nrs <- which(colnames(expected_biomarkers) %in% select_columns)
+    expected_biomarkers_select <- expected_biomarkers %>% select(all_of(select_columns))    
     # remove duplicates
     expected_biomarkers_select <- expected_biomarkers_select[!duplicated(expected_biomarkers_select[, c(1, 2)]), ]
 

From b4b0b1c14a3365b22a2056066fc82e468743cd8b Mon Sep 17 00:00:00 2001
From: ALuesink <anneluesink@live.nl>
Date: Fri, 11 Oct 2024 11:58:21 +0200
Subject: [PATCH 07/18] Added trimming per scanmodus

---
 DIMS/AssignToBins.R   |  4 ++--
 DIMS/GenerateBreaks.R | 22 ++++++++++++++++------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/DIMS/AssignToBins.R b/DIMS/AssignToBins.R
index 4ca7fce..5c588cb 100644
--- a/DIMS/AssignToBins.R
+++ b/DIMS/AssignToBins.R
@@ -44,8 +44,8 @@ raw_data_matrix <- xcms::rawMat(raw_data)
 pos_times <- raw_data@scantime[raw_data@polarity == "positive"]
 neg_times <- raw_data@scantime[raw_data@polarity == "negative"]
 # Select scans between trim_left and trim_right
-pos_times <- pos_times[pos_times > trim_left & pos_times < trim_right]
-neg_times <- neg_times[neg_times > trim_left & neg_times < trim_right]
+pos_times <- pos_times[pos_times > trim_left_pos & pos_times < trim_right_pos]
+neg_times <- neg_times[neg_times > trim_left_neg & neg_times < trim_right_neg]
 
 # Generate an index with which to select values for each mode
 pos_index <- which(raw_data_matrix[, "time"] %in% pos_times)
diff --git a/DIMS/GenerateBreaks.R b/DIMS/GenerateBreaks.R
index 6aacfe8..ca7040d 100644
--- a/DIMS/GenerateBreaks.R
+++ b/DIMS/GenerateBreaks.R
@@ -12,8 +12,10 @@ trim <- as.numeric(cmd_args[3])
 resol <- as.numeric(cmd_args[4])
 
 # initialize
-trim_left <- NULL
-trim_right <- NULL
+trim_left_pos <- NULL
+trim_right_pos <- NULL
+trim_left_neg <- NULL
+trim_right_neg <- NULL
 breaks_fwhm <- NULL
 breaks_fwhm_avg <- NULL
 bins <- NULL
@@ -21,9 +23,17 @@ bins <- NULL
 # read in mzML file
 raw_data <- suppressMessages(xcms::xcmsRaw(filepath))
 
-# trim (remove) scans at the start and end
-trim_left  <- round(raw_data@scantime[length(raw_data@scantime) * trim])
-trim_right <- round(raw_data@scantime[length(raw_data@scantime) * (1 - trim)])
+# Get time values for positive and negative scans
+pos_times <- raw_data@scantime[raw_data@polarity == "positive"]
+neg_times <- raw_data@scantime[raw_data@polarity == "negative"]
+
+# trim (remove) scans at the start and end for positive
+trim_left_pos  <- round(pos_times[length(pos_times) * trim])
+trim_right_pos <- round(pos_times[length(pos_times) * (1 - trim)])
+
+# trim (remove) scans at the start and end for negative
+trim_left_neg  <- round(neg_times[length(neg_times) * trim])
+trim_right_neg <- round(neg_times[length(neg_times) * (1 - trim)])
 
 # Mass range m/z
 low_mz  <- raw_data@mzrange[1]
@@ -47,5 +57,5 @@ for (i in 1:nr_segments) {
 }
 
 # generate output file
-save(breaks_fwhm, breaks_fwhm_avg, trim_left, trim_right, file = "breaks.fwhm.RData")
+save(breaks_fwhm, breaks_fwhm_avg, trim_left_pos, trim_right_pos, trim_left_neg, trim_right_neg, file = "breaks.fwhm.RData")
 save(high_mz, file = "highest_mz.RData")

From f550d930f09ff76232aafa9d8f4a4eed61f2d147 Mon Sep 17 00:00:00 2001
From: ALuesink <anneluesink@live.nl>
Date: Mon, 14 Oct 2024 10:56:48 +0200
Subject: [PATCH 08/18] Add trim lines to TIC plots

---
 DIMS/AverageTechReplicates.R  | 13 +++++++++++--
 DIMS/AverageTechReplicates.nf |  4 +++-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/DIMS/AverageTechReplicates.R b/DIMS/AverageTechReplicates.R
index 02279fb..ddda4be 100644
--- a/DIMS/AverageTechReplicates.R
+++ b/DIMS/AverageTechReplicates.R
@@ -13,6 +13,7 @@ run_name <- cmd_args[3]
 dims_matrix <- cmd_args[4]
 highest_mz_file <- cmd_args[5]
 highest_mz <- get(load(highest_mz_file))
+breaks_filepath <- cmd_args[6]
 thresh2remove <- 1000000000
 dims_thresh <- 100
 
@@ -43,14 +44,18 @@ remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
 # get repl_pattern
 load(init_file)
 
+# get trim parameters
+load(breaks_filepath)
+
 # lower the threshold below which a sample will be removed for DBS and for high m/z
 if (dims_matrix == "DBS") {
   thresh2remove <- 500000000
 }
 if (highest_mz > 700) {
-  thresh2remove <- 1000000
+  thresh2remove <- 10000000
 }
 
+
 # remove technical replicates which are below the threshold
 remove_neg <- NULL
 remove_pos <- NULL
@@ -64,7 +69,7 @@ for (sample_nr in 1:length(repl_pattern)) {
   nr_pos <- 0
   nr_neg <- 0
   for (file_nr in 1:length(tech_reps)) {
-    load(paste(tech_reps[file_nr], ".RData", sep = ""))
+    load(paste("AverageTechReplicates/", tech_reps[file_nr], ".RData", sep = ""))
     cat("\n\nParsing", tech_reps[file_nr])
     # negative scanmode
     cat("\n\tNegative peak_list sum", sum(peak_list$neg[, 1]))
@@ -162,6 +167,10 @@ for (sample_nr in c(1:length(repl_pattern))) {
     tic_plot <- ggplot(repl1_nr, aes(retention_time, tic_intensity)) +
       geom_line(linewidth = 0.3) +
       geom_hline(yintercept = highest_tic_max, col = "grey", linetype = 2, linewidth = 0.3) +
+      geom_vline(xintercept = trim_left_pos, col = "red", linetype = 2, linewidth = 0.3) +
+      geom_vline(xintercept = trim_right_pos, col = "red", linetype = 2, linewidth = 0.3) +
+      geom_vline(xintercept = trim_left_neg, col = "red", linetype = 2, linewidth = 0.3) +
+      geom_vline(xintercept = trim_right_neg, col = "red", linetype = 2, linewidth = 0.3) +
       labs(x = "t (s)", y = "tic_intensity", title = paste0(tech_reps[file_nr], "  ||  ", sample_name)) +
       theme(plot.background = element_rect(fill = plot_color),
             axis.text = element_text(size = 4),
diff --git a/DIMS/AverageTechReplicates.nf b/DIMS/AverageTechReplicates.nf
index 5387e61..f4b85b9 100644
--- a/DIMS/AverageTechReplicates.nf
+++ b/DIMS/AverageTechReplicates.nf
@@ -12,6 +12,7 @@ process AverageTechReplicates {
        val(analysis_id)
        val(matrix)
        path(highest_mz_file)
+       path(breaks_file)
 
     output:
        path('*_repl_pattern.RData'), emit: pattern_files
@@ -26,7 +27,8 @@ process AverageTechReplicates {
                                                                       $params.nr_replicates \
                                                                       $analysis_id \
                                                                       $matrix \
-                                                                      $highest_mz_file
+                                                                      $highest_mz_file \
+                                                                      $breaks_file
         """
 }
 

From 958c48cd5f784c8a9c33d03894010958c09e18cd Mon Sep 17 00:00:00 2001
From: Anne Luesink <anneluesink@live.nl>
Date: Fri, 18 Oct 2024 13:32:03 +0200
Subject: [PATCH 09/18] fixed print error

---
 DIMS/GenerateExcel.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R
index 01f3c24..1e5d4c4 100644
--- a/DIMS/GenerateExcel.R
+++ b/DIMS/GenerateExcel.R
@@ -114,7 +114,7 @@ outlist <- cbind(outlist[peaks_in_list, ], as.data.frame(rlvnc_in_list))
 outlist <- outlist %>%
   tibble::rownames_to_column("rowname") %>%
   filter(grepl("relevant|Onbekend|Internal", relevance)) %>%
-  tibble::column_to_rownames("rowname")print("Add HMDB_code column")
+  tibble::column_to_rownames("rowname")
 
 # Add HMDB_code column with all the HMDB ID and sort on it
 outlist <- cbind(outlist, "HMDB_code" = rownames(outlist))

From 34b3c608f18dcd1d90cf8f7916608a0d8888d908 Mon Sep 17 00:00:00 2001
From: ALuesink <anneluesink@live.nl>
Date: Mon, 21 Oct 2024 10:26:35 +0200
Subject: [PATCH 10/18] removed print statements

---
 DIMS/GenerateExcel.R | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R
index 1e5d4c4..bd90015 100644
--- a/DIMS/GenerateExcel.R
+++ b/DIMS/GenerateExcel.R
@@ -95,14 +95,12 @@ tmp_pos_left <- outlist_pos_adducts_hmdb[-index_pos, ]
 # same for negative mode
 tmp_neg <- outlist_neg_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos], ] %>% select(-c(HMDB_name, HMDB_name_all, HMDB_ID_all, sec_HMDB_ID))
 tmp_neg_left <- outlist_neg_adducts_hmdb[-index_neg, ]
-print("combine pos + neg")
 # Combine positive and negative numbers and paste back HMDB column
 tmp <- apply(tmp_pos, 2, as.numeric) + apply(tmp_neg, 2, as.numeric)
 rownames(tmp) <- rownames(tmp_pos)
 tmp <- cbind(tmp, tmp_pos_info)
 outlist <- rbind(tmp, tmp_pos_left, tmp_neg_left)
 outlist <- outlist %>% arrange(rownames(outlist))
-print("Add biological relevance")
 
 # Filter for biological relevance
 # peaks_in_list <- which(rownames(outlist) %in% rownames(rlvnc))
@@ -265,7 +263,6 @@ if (z_score == 1) {
   openxlsx::setRowHeights(wb, filelist, rows = c(1:nrow(outlist)), heights = 18)
   openxlsx::setColWidths(wb, filelist, cols = c(1:ncol(outlist)), widths = 20)
 }
-print("Write Excel file")
 # write Excel file
 outlist$name <- stringi::stri_enc_toutf8(outlist$name)
 openxlsx::writeData(wb, sheet = 1, outlist, startCol = 1)

From 2a0d85287aeb5b63f368b9071945ff57c3637e52 Mon Sep 17 00:00:00 2001
From: ALuesink <anneluesink@live.nl>
Date: Mon, 21 Oct 2024 11:33:06 +0200
Subject: [PATCH 11/18] Added outlier removal

---
 DIMS/GenerateExcel.R | 68 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 55 insertions(+), 13 deletions(-)

diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R
index bd90015..476ed7f 100644
--- a/DIMS/GenerateExcel.R
+++ b/DIMS/GenerateExcel.R
@@ -16,6 +16,7 @@ project <- cmd_args[2]
 dims_matrix <- cmd_args[3]
 hmdb_file <- cmd_args[4]
 z_score <- as.numeric(cmd_args[5])
+outlier_threshold <- 2
 
 round_df <- function(df, digits) {
   #' Round numbers to a set number of digits for numeric values
@@ -44,6 +45,25 @@ robust_scaler <- function(control_intensities, control_col_ids, perc = 5) {
   return(trimmed_control_intensities)
 }
 
+remove_outliers_for_Zscore <- function(control_intensities, outlier_threshold = 2) {
+  #' Remove outliers per metabolite before calculating Z-scores
+  #'
+  #' @param control_intensities: Vector with intensities for control samples
+  #' @param outlier_threshold: Threshold for outliers which will be removed from controls (float)
+  #'
+  #' @return trimmed_control_intensities: Intensities trimmed for outliers
+  mean_permetabolite <- mean(as.numeric(control_intensities))
+  stdev_permetabolite <- sd(as.numeric(control_intensities))
+  zscores_permetabolite <- (control_intensities - mean_permetabolite) / stdev_permetabolite
+  # remove intensities with a zscore_permetabolite greater than outlier_threshold
+  if (sum(zscores_permetabolite > outlier_threshold) > 0) {
+    trimmed_control_intensities <- control_intensities[-which(zscores_permetabolite > outlier_threshold)]
+  } else {
+    trimmed_control_intensities <- control_intensities
+  }
+  return(trimmed_control_intensities)
+}
+
 # Initialise
 plot <- TRUE
 export <- TRUE
@@ -95,6 +115,7 @@ tmp_pos_left <- outlist_pos_adducts_hmdb[-index_pos, ]
 # same for negative mode
 tmp_neg <- outlist_neg_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos], ] %>% select(-c(HMDB_name, HMDB_name_all, HMDB_ID_all, sec_HMDB_ID))
 tmp_neg_left <- outlist_neg_adducts_hmdb[-index_neg, ]
+
 # Combine positive and negative numbers and paste back HMDB column
 tmp <- apply(tmp_pos, 2, as.numeric) + apply(tmp_neg, 2, as.numeric)
 rownames(tmp) <- rownames(tmp_pos)
@@ -146,7 +167,7 @@ if (z_score == 1) {
   outlist[, intensity_col_ids][outlist[, intensity_col_ids] == 0] <- NA
 
   # save outlist as it is and use it to calculate robust scaler
-  outlist_noZ <- outlist
+  outlist_robustZ <- outlist_nooutliers <- outlist
 
   # calculate mean and sd for Control group
   outlist$avg.ctrls <- apply(control_columns, 1, function(x) mean(as.numeric(x), na.rm = TRUE))
@@ -163,15 +184,15 @@ if (z_score == 1) {
   colnames(outlist)[startcol:ncol(outlist)] <- colnames_z
 
   # calculate robust scaler (Zscores minus outliers in Controls)
-  outlist_noZ$avg.ctrls <- 0
-  outlist_noZ$sd.ctrls  <- 0
+  outlist_robustZ$avg.ctrls <- 0
+  outlist_robustZ$sd.ctrls  <- 0
 
   # only calculate robust Z-scores if there are enough Controls
   if (length(control_col_ids) > 10) {
     for (metabolite_index in 1:nrow(outlist)) {
-      outlist_noZ$avg.ctrls[metabolite_index] <- mean(robust_scaler(outlist_noZ[metabolite_index, control_col_ids],
+      outlist_robustZ$avg.ctrls[metabolite_index] <- mean(robust_scaler(outlist_robustZ[metabolite_index, control_col_ids],
                                                                     control_col_ids, perc))
-      outlist_noZ$sd.ctrls[metabolite_index]  <-   sd(robust_scaler(outlist_noZ[metabolite_index, control_col_ids],
+      outlist_robustZ$sd.ctrls[metabolite_index]  <-   sd(robust_scaler(outlist_robustZ[metabolite_index, control_col_ids],
                                                                     control_col_ids, perc))
     }
   }
@@ -179,17 +200,39 @@ if (z_score == 1) {
   # Make and add columns with robust zscores
   cnames_robust <- gsub("_Zscore", "_RobustZscore", colnames_z)
   for (i in intensity_col_ids) {
-    zscores_1col <- (as.numeric(as.vector(unlist(outlist_noZ[, i]))) - outlist_noZ$avg.ctrls) / outlist_noZ$sd.ctrls
-    outlist_noZ <- cbind(outlist_noZ, zscores_1col)
+    zscores_1col <- (as.numeric(as.vector(unlist(outlist_robustZ[, i]))) - outlist_robustZ$avg.ctrls) / outlist_robustZ$sd.ctrls
+    outlist_robustZ <- cbind(outlist_robustZ, zscores_1col)
   }
-  colnames(outlist_noZ)[startcol:ncol(outlist_noZ)] <- cnames_robust
+  colnames(outlist_robustZ)[startcol:ncol(outlist_robustZ)] <- cnames_robust
+
+  # calculate Z-scores after removal of outliers in Control samples
+  if (length(control_col_ids) > 10) {
+    for (metabolite_index in 1:nrow(outlist_nooutliers)) {
+      intensities_without_outliers <- remove_outliers_for_Zscore(as.numeric(outlist_nooutliers[metabolite_index, control_col_ids]), outlier_threshold)
+      outlist_nooutliers$avg.ctrls[metabolite_index] <-   mean(intensities_without_outliers)
+      outlist_nooutliers$sd.ctrls[metabolite_index]  <-     sd(intensities_without_outliers)
+      outlist_nooutliers$nr.ctrls[metabolite_index]  <- length(intensities_without_outliers)
+    }
+  }
+ 
+  cnames_nooutliers <- gsub("_Zscore", "_OutlierRemovedZscore", colnames_z)
+  for (i in intensity_col_ids) {
+    zscores_1col <- (as.numeric(as.vector(unlist(outlist_nooutliers[, i]))) - outlist_nooutliers$avg.ctrls) / outlist_nooutliers$sd.ctrls
+    outlist_nooutliers <- cbind(outlist_nooutliers, zscores_1col)
+  }
+  # add column names for Z-scores without outliers. NB: 1 extra column so shift to +1
+  colnames(outlist_nooutliers)[(startcol + 1):ncol(outlist_nooutliers)] <- cnames_nooutliers
+
 
   # output metabolites filtered on relevance
   save(outlist, file = paste0("AdductSums_filtered_Zscores.RData"))
   write.table(outlist, file = paste0("AdductSums_filtered_Zscores.txt"), sep = "\t", row.names = FALSE)
   # output filtered metabolites with robust scaled Zscores
-  save(outlist_noZ, file = paste0("AdductSums_filtered_robustZ.RData"))
-  write.table(outlist_noZ, file = paste0("AdductSums_filtered_robustZ.txt"), sep = "\t", row.names = FALSE)
+  save(outlist_robustZ, file = paste0("AdductSums_filtered_robustZ.RData"))
+  write.table(outlist_robustZ, file = paste0("AdductSums_filtered_robustZ.txt"), sep = "\t", row.names = FALSE)
+  # output filtered metabolites after removal of outliers
+  save(outlist_nooutliers, file = paste0("AdductSums_filtered_outliersremovedZ.RData"))
+  write.table(outlist_nooutliers, file = paste0("AdductSums_filtered_outliersremovedZ.txt"), sep = "\t", row.names = FALSE)
 
   # get the IDs of the patients and sort
   patient_ids <- unique(as.vector(unlist(lapply(strsplit(colnames(patient_columns), ".", fixed = TRUE), function(x) x[1]))))
@@ -263,8 +306,8 @@ if (z_score == 1) {
   openxlsx::setRowHeights(wb, filelist, rows = c(1:nrow(outlist)), heights = 18)
   openxlsx::setColWidths(wb, filelist, cols = c(1:ncol(outlist)), widths = 20)
 }
+
 # write Excel file
-outlist$name <- stringi::stri_enc_toutf8(outlist$name)
 openxlsx::writeData(wb, sheet = 1, outlist, startCol = 1)
 xlsx_name <- paste0(outdir, "/", project, ".xlsx")
 openxlsx::saveWorkbook(wb, xlsx_name, overwrite = TRUE)
@@ -300,7 +343,6 @@ is_summed$Intensity <- as.numeric(as.character(is_summed$Intensity))
 
 # Retrieve IS positive mode
 is_pos <- as.data.frame(subset(outlist_pos_adducts_hmdb, rownames(outlist_pos_adducts_hmdb) %in% is_codes))
-is_pos <- is_pos[c(names(repl_pattern))]
 is_pos$HMDB_name <- is_list[match(row.names(is_pos), is_list$HMDB_code, nomatch = NA), "name"]
 is_pos$HMDB.code <- row.names(is_pos)
 is_pos <- reshape2::melt(is_pos, id.vars = c("HMDB.code", "HMDB_name"))
@@ -312,7 +354,6 @@ is_pos$Intensity <- as.numeric(as.character(is_pos$Intensity))
 
 # Retrieve IS negative mode
 is_neg <- as.data.frame(subset(outlist_neg_adducts_hmdb, rownames(outlist_neg_adducts_hmdb) %in% is_codes))
-is_neg <- is_neg[c(names(repl_pattern))]
 is_neg$HMDB_name <- is_list[match(row.names(is_neg), is_list$HMDB_code, nomatch = NA), "name"]
 is_neg$HMDB.code <- row.names(is_neg)
 is_neg <- reshape2::melt(is_neg, id.vars = c("HMDB.code", "HMDB_name"))
@@ -321,6 +362,7 @@ is_neg$Matrix <- dims_matrix
 is_neg$Rundate <- rundate
 is_neg$Project <- project
 is_neg$Intensity <- as.numeric(as.character(is_neg$Intensity))
+
 # Save results
 save(is_pos, is_neg, is_summed, file = paste0(outdir, "/", project, "_IS_results.RData"))
 

From 9304507277ae1c33352218dd7d447737b12f3a47 Mon Sep 17 00:00:00 2001
From: Anne Luesink <anneluesink@live.nl>
Date: Mon, 21 Oct 2024 11:55:56 +0200
Subject: [PATCH 12/18] Added SST output

---
 DIMS/AverageTechReplicates.R |  2 +-
 DIMS/GenerateExcel.R         | 78 ++++++++++++++++++++++++++++++++++++
 DIMS/GenerateExcel.nf        |  2 +-
 3 files changed, 80 insertions(+), 2 deletions(-)

diff --git a/DIMS/AverageTechReplicates.R b/DIMS/AverageTechReplicates.R
index ddda4be..9d3671a 100644
--- a/DIMS/AverageTechReplicates.R
+++ b/DIMS/AverageTechReplicates.R
@@ -69,7 +69,7 @@ for (sample_nr in 1:length(repl_pattern)) {
   nr_pos <- 0
   nr_neg <- 0
   for (file_nr in 1:length(tech_reps)) {
-    load(paste("AverageTechReplicates/", tech_reps[file_nr], ".RData", sep = ""))
+    load(paste(tech_reps[file_nr], ".RData", sep = ""))
     cat("\n\nParsing", tech_reps[file_nr])
     # negative scanmode
     cat("\n\tNegative peak_list sum", sum(peak_list$neg[, 1]))
diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R
index d36ac29..1fa1eac 100644
--- a/DIMS/GenerateExcel.R
+++ b/DIMS/GenerateExcel.R
@@ -16,6 +16,8 @@ project <- cmd_args[2]
 dims_matrix <- cmd_args[3]
 hmdb_file <- cmd_args[4]
 z_score <- as.numeric(cmd_args[5])
+sst_components_file <- cmd_args[6]
+print(sst_components_file)
 
 round_df <- function(df, digits) {
   #' Round numbers to a set number of digits for numeric values
@@ -603,6 +605,82 @@ if (z_score == 1) {
   }
 }
 
+### SST components output ####
+calculate_coefficient_of_variation <- function(intensity_list) {
+  for (col_nr in 1:ncol(intensity_list)) {
+    intensity_list[, col_nr] <- as.numeric(intensity_list[, col_nr])
+    intensity_list[, col_nr] <- round(intensity_list[, col_nr], 0)
+  }
+  sd_allsamples <- round(apply(intensity_list, 1, sd), 0)
+  mean_allsamples <- round(apply(intensity_list, 1, mean), 0)
+  cv_allsamples <- round(100 * sd_allsamples / mean_allsamples, 1)
+  intensity_list_with_cv <- cbind(CV_perc = cv_allsamples,
+                                  mean = mean_allsamples,
+                                  sd = sd_allsamples,
+                                  intensity_list)
+  return(intensity_list_with_cv)
+}
+
+# Internal standards lists, calculate coefficients of variation
+if ("plots" %in% colnames(is_list)) {
+  intensity_col_ids <- 2:(which(colnames(is_list) == "HMDB_name") - 1)
+} else {
+  intensity_col_ids <- 1:(which(colnames(is_list) == "HMDB_name") - 1)
+}
+# previous intensity_col_ids is based on the assumption that there are Controls and Patients
+is_list_intensities <- is_list[ , intensity_col_ids]
+is_list_intensities <- calculate_coefficient_of_variation(is_list_intensities)
+is_list_intensities <- cbind(IS_name = is_list$HMDB_name, is_list_intensities)
+
+# separate adducts of IS
+is_pos <- as.data.frame(subset(outlist_pos_adducts_hmdb, rownames(outlist_pos_adducts_hmdb) %in% is_codes))
+is_neg <- as.data.frame(subset(outlist_neg_adducts_hmdb, rownames(outlist_neg_adducts_hmdb) %in% is_codes))
+is_pos_intensities <- is_pos[ , -which(colnames(is_pos) == "HMDB_name")]
+is_neg_intensities <- is_neg[ , -which(colnames(is_neg) == "HMDB_name")]
+is_pos_intensities <- calculate_coefficient_of_variation(is_pos_intensities)
+is_neg_intensities <- calculate_coefficient_of_variation(is_neg_intensities)
+is_pos_intensities <- cbind(IS_name = is_pos$HMDB_name, is_pos_intensities)
+is_neg_intensities <- cbind(IS_name = is_neg$HMDB_name, is_neg_intensities)
+
+# SST components. 
+sst_comp <- read.csv(sst_components_file, header = TRUE, sep = "\t")
+sst_rows <- which(outlist$HMDB_code %in% sst_comp$HMDB_ID)
+sst_list <- outlist[sst_rows, ]
+sst_colnrs <- grep("P1001", colnames(sst_list))
+if (length(sst_colnrs) > 0) {
+  sst_list_intensities <- sst_list[ , sst_colnrs]
+} else {
+  sst_list_intensities <- sst_list[ , intensity_col_ids]
+}
+for (col_nr in 1:ncol(sst_list_intensities)) {
+  sst_list_intensities[, col_nr] <- as.numeric(sst_list_intensities[, col_nr])
+  if (grepl("Zscore", colnames(sst_list_intensities)[col_nr])) {
+    sst_list_intensities[, col_nr] <- round(sst_list_intensities[, col_nr], 2)
+  } else {
+    sst_list_intensities[, col_nr] <- round(sst_list_intensities[, col_nr])
+  }
+}
+sst_list_intensities <- cbind(SST_comp_name = sst_list$HMDB_name, sst_list_intensities)
+
+# Create Excel file
+wb <- createWorkbook("IS_SST")
+addWorksheet(wb, "Internal Standards")
+openxlsx::writeData(wb, sheet = 1, is_list_intensities)
+setColWidths(wb, 1, cols = 1, widths = 24)
+addWorksheet(wb, "IS pos")
+openxlsx::writeData(wb, sheet = 2, is_pos_intensities)
+setColWidths(wb, 2, cols = 1, widths = 24)
+addWorksheet(wb, "IS neg")
+openxlsx::writeData(wb, sheet = 3, is_neg_intensities)
+setColWidths(wb, 3, cols = 1, widths = 24)
+addWorksheet(wb, "SST components")
+openxlsx::writeData(wb, sheet = 4, sst_list_intensities)
+setColWidths(wb, 4, cols = 1:3, widths = 24)
+xlsx_name <- paste0(outdir, "/", project, "_IS_SST.xlsx")
+openxlsx::saveWorkbook(wb, xlsx_name, overwrite = TRUE)
+rm(wb)
+
+
 ### MISSING M/Z CHECK
 # check the outlist_identified_(negative/positive).RData files for missing m/z values and mention in the results mail
 # Load the outlist_identified files + remove the loaded files
diff --git a/DIMS/GenerateExcel.nf b/DIMS/GenerateExcel.nf
index 6134766..2979444 100644
--- a/DIMS/GenerateExcel.nf
+++ b/DIMS/GenerateExcel.nf
@@ -19,6 +19,6 @@ process GenerateExcel {
 
     script:
         """
-        Rscript ${baseDir}/CustomModules/DIMS/GenerateExcel.R $init_file $analysis_id $params.matrix $relevance_file $params.zscore 
+        Rscript ${baseDir}/CustomModules/DIMS/GenerateExcel.R $init_file $analysis_id $params.matrix $relevance_file $params.zscore $params.sst_components_file
         """
 }

From 0d30b63108343e167be9a4d48db64be9dd077b5b Mon Sep 17 00:00:00 2001
From: ALuesink <anneluesink@live.nl>
Date: Mon, 21 Oct 2024 16:01:38 +0200
Subject: [PATCH 13/18] Added initialization of no outliers variables

---
 DIMS/GenerateExcel.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R
index 56b5d37..83c7f28 100644
--- a/DIMS/GenerateExcel.R
+++ b/DIMS/GenerateExcel.R
@@ -187,6 +187,9 @@ if (z_score == 1) {
   # calculate robust scaler (Zscores minus outliers in Controls)
   outlist_robustZ$avg.ctrls <- 0
   outlist_robustZ$sd.ctrls  <- 0
+  outlist_nooutliers$avg.ctrls <- 0
+  outlist_nooutliers$sd.ctrls <- 0
+  outlist_nooutliers$nr.ctrls <- 0
 
   # only calculate robust Z-scores if there are enough Controls
   if (length(control_col_ids) > 10) {

From ce07e4901b75bc8be8b3684927adc8936301ca57 Mon Sep 17 00:00:00 2001
From: Anne Luesink <anneluesink@live.nl>
Date: Mon, 28 Oct 2024 09:23:00 +0100
Subject: [PATCH 14/18] Changed trimming positive mode

---
 DIMS/GenerateBreaks.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DIMS/GenerateBreaks.R b/DIMS/GenerateBreaks.R
index ca7040d..d07fd20 100644
--- a/DIMS/GenerateBreaks.R
+++ b/DIMS/GenerateBreaks.R
@@ -28,8 +28,8 @@ pos_times <- raw_data@scantime[raw_data@polarity == "positive"]
 neg_times <- raw_data@scantime[raw_data@polarity == "negative"]
 
 # trim (remove) scans at the start and end for positive
-trim_left_pos  <- round(pos_times[length(pos_times) * trim])
-trim_right_pos <- round(pos_times[length(pos_times) * (1 - trim)])
+trim_left_pos  <- round(pos_times[length(pos_times) * (trim * 1.5)]) # 15% aan het begin
+trim_right_pos <- round(pos_times[length(pos_times) * (1 - (trim * 0.5))]) # 5% aan het eind
 
 # trim (remove) scans at the start and end for negative
 trim_left_neg  <- round(neg_times[length(neg_times) * trim])

From 4703c39fa3509cd7020893f4a65087bfb69aa1c1 Mon Sep 17 00:00:00 2001
From: Anne Luesink <anneluesink@live.nl>
Date: Mon, 28 Oct 2024 09:23:41 +0100
Subject: [PATCH 15/18] Added CV to SST output

---
 DIMS/GenerateExcel.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R
index 83c7f28..cd16507 100644
--- a/DIMS/GenerateExcel.R
+++ b/DIMS/GenerateExcel.R
@@ -709,7 +709,10 @@ sst_rows <- which(outlist$HMDB_code %in% sst_comp$HMDB_ID)
 sst_list <- outlist[sst_rows, ]
 sst_colnrs <- grep("P1001", colnames(sst_list))
 if (length(sst_colnrs) > 0) {
-  sst_list_intensities <- sst_list[ , sst_colnrs]
+  sst_list_intensities <- sst_list[, sst_colnrs]
+  control_list_intensities <- sst_list[, control_col_ids]
+  control_list_cv <- calculate_coefficient_of_variation(control_list_intensities)
+  sst_list_intensities <- cbind(sst_list_intensities, CV_controls = control_list_cv[ , "CV_perc"])
 } else {
   sst_list_intensities <- sst_list[ , intensity_col_ids]
 }

From c361f1d3ad006acedc238a30a2f99684983812e8 Mon Sep 17 00:00:00 2001
From: ALuesink <anneluesink@live.nl>
Date: Mon, 11 Nov 2024 11:18:25 +0100
Subject: [PATCH 16/18] Changes as a result of the code review

---
 DIMS/AssignToBins.R          |   3 +-
 DIMS/AverageTechReplicates.R |   5 +-
 DIMS/GenerateExcel.R         | 232 +++++++++++++++++------------------
 3 files changed, 121 insertions(+), 119 deletions(-)

diff --git a/DIMS/AssignToBins.R b/DIMS/AssignToBins.R
index 5c588cb..e49d6c6 100644
--- a/DIMS/AssignToBins.R
+++ b/DIMS/AssignToBins.R
@@ -12,7 +12,8 @@ resol <- as.numeric(cmd_args[3])
 trim <- 0.1
 dims_thresh <- 100
 
-# load breaks_fwhm
+# load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
+# trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
 load(breaks_filepath)
 
 # get sample name
diff --git a/DIMS/AverageTechReplicates.R b/DIMS/AverageTechReplicates.R
index 9d3671a..0c57ad7 100644
--- a/DIMS/AverageTechReplicates.R
+++ b/DIMS/AverageTechReplicates.R
@@ -41,10 +41,11 @@ remove_from_repl_pattern <- function(bad_samples, repl_pattern, nr_replicates) {
   return(list("pattern" = repl_pattern))
 }
 
-# get repl_pattern
+# load init_file: contains repl_pattern
 load(init_file)
 
-# get trim parameters
+# load breaks_file: contains breaks_fwhm, breaks_fwhm_avg,
+# trim_left_neg, trim_left_pos, trim_right_neg & trim_right_pos
 load(breaks_filepath)
 
 # lower the threshold below which a sample will be removed for DBS and for high m/z
diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R
index cd16507..28b2707 100644
--- a/DIMS/GenerateExcel.R
+++ b/DIMS/GenerateExcel.R
@@ -118,16 +118,14 @@ tmp_neg <- outlist_neg_adducts_hmdb[rownames(outlist_pos_adducts_hmdb)[index_pos
 tmp_neg_left <- outlist_neg_adducts_hmdb[-index_neg, ]
 
 # Combine positive and negative numbers and paste back HMDB column
-tmp <- apply(tmp_pos, 2, as.numeric) + apply(tmp_neg, 2, as.numeric)
-rownames(tmp) <- rownames(tmp_pos)
-tmp <- cbind(tmp, tmp_pos_info)
-outlist <- rbind(tmp, tmp_pos_left, tmp_neg_left)
+combi_pos_neg <- apply(tmp_pos, 2, as.numeric) + apply(tmp_neg, 2, as.numeric)
+rownames(combi_pos_neg) <- rownames(tmp_pos)
+combi_pos_neg <- cbind(combi_pos_neg, tmp_pos_info)
+outlist <- rbind(combi_pos_neg, tmp_pos_left, tmp_neg_left)
 outlist <- outlist %>% arrange(rownames(outlist))
 
 # Filter for biological relevance
-# peaks_in_list <- which(rownames(outlist) %in% rownames(rlvnc))
-peaks_in_list <- which(rownames(outlist) %in% rlvnc$HMDB_key)
-rlvnc_in_list <- rlvnc %>% filter(HMDB_key %in% rownames(outlist)[peaks_in_list])
+rlvnc_in_list <- rlvnc %>% filter(HMDB_key %in% rownames(outlist))
 outlist <- cbind(outlist[peaks_in_list, ], as.data.frame(rlvnc_in_list))
 
 # filter out all irrelevant HMDBs
@@ -168,7 +166,8 @@ if (z_score == 1) {
   outlist[, intensity_col_ids][outlist[, intensity_col_ids] == 0] <- NA
 
   # save outlist as it is and use it to calculate robust scaler
-  outlist_robustZ <- outlist_nooutliers <- outlist
+  outlist_robustZ <- outlist
+  outlist_nooutliers <- outlist
 
   # calculate mean and sd for Control group
   outlist$avg.ctrls <- apply(control_columns, 1, function(x) mean(as.numeric(x), na.rm = TRUE))
@@ -213,17 +212,18 @@ if (z_score == 1) {
   if (length(control_col_ids) > 10) {
     for (metabolite_index in 1:nrow(outlist_nooutliers)) {
       intensities_without_outliers <- remove_outliers_for_Zscore(as.numeric(outlist_nooutliers[metabolite_index, control_col_ids]), outlier_threshold)
-      outlist_nooutliers$avg.ctrls[metabolite_index] <-   mean(intensities_without_outliers)
-      outlist_nooutliers$sd.ctrls[metabolite_index]  <-     sd(intensities_without_outliers)
+      outlist_nooutliers$avg.ctrls[metabolite_index] <- mean(intensities_without_outliers)
+      outlist_nooutliers$sd.ctrls[metabolite_index]  <- sd(intensities_without_outliers)
       outlist_nooutliers$nr.ctrls[metabolite_index]  <- length(intensities_without_outliers)
     }
   }
- 
+
   cnames_nooutliers <- gsub("_Zscore", "_OutlierRemovedZscore", colnames_z)
-  for (i in intensity_col_ids) {
-    zscores_1col <- (as.numeric(as.vector(unlist(outlist_nooutliers[, i]))) - outlist_nooutliers$avg.ctrls) / outlist_nooutliers$sd.ctrls
-    outlist_nooutliers <- cbind(outlist_nooutliers, zscores_1col)
-  }
+  outlist_nooutliers %>% apply(outlist_nooutliers, 2, function(col) {
+    (as.numeric(as.vector(unlist(col))) - outlist_nooutliers$avg.ctrls) / outlist_nooutliers$sd.ctrls
+  }) %>%
+    cbind(outlist_nooutliers)
+
   # add column names for Z-scores without outliers. NB: 1 extra column so shift to +1
   colnames(outlist_nooutliers)[(startcol + 1):ncol(outlist_nooutliers)] <- cnames_nooutliers
 
@@ -318,14 +318,14 @@ openxlsx::saveWorkbook(wb, xlsx_name, overwrite = TRUE)
 rm(wb)
 
 #### INTERNAL STANDARDS ####
-is_list <- outlist[grep("Internal standard", outlist[, "relevance"], fixed = TRUE), ]
-is_codes <- rownames(is_list)
+internal_stand_list <- outlist[grep("Internal standard", outlist[, "relevance"], fixed = TRUE), ]
+internal_stand_codes <- rownames(internal_stand_list)
 
 # if all data from one samplename (for example P195.1) is filtered out in 3-averageTechReplicates 
 # because of too little data (threshold parameter)i, the init.RData (repl_pattern) will contain more sample_names 
 # than the peak data (IS), so this data needs to be removed first, before the retrieval of the summed adducts. 
 # Write sample_names to a log file, to let user know that this sample_name contained no data.
-sample_names_nodata <- setdiff(names(repl_pattern), names(is_list))
+sample_names_nodata <- setdiff(names(repl_pattern), names(internal_stand_list))
 if (!is.null(sample_names_nodata)) {
   write.table(sample_names_nodata, file = paste(outdir, "sample_names_nodata.txt", sep = "/"), 
 	      row.names = FALSE, col.names = FALSE, quote = FALSE)
@@ -336,53 +336,53 @@ if (!is.null(sample_names_nodata)) {
 }
 
 # Retrieve IS summed adducts
-is_summed <- is_list[c(names(repl_pattern), "HMDB_code")]
-is_summed$HMDB.name <- is_list$name
-is_summed <- reshape2::melt(is_summed, id.vars = c("HMDB_code", "HMDB.name"))
-colnames(is_summed) <- c("HMDB.code", "HMDB.name", "Sample", "Intensity")
-is_summed$Matrix <- dims_matrix
-is_summed$Rundate <- rundate
-is_summed$Project <- project
-is_summed$Intensity <- as.numeric(as.character(is_summed$Intensity))
+internal_stand_summed <- internal_stand_list[c(names(repl_pattern), "HMDB_code")]
+internal_stand_summed$HMDB.name <- internal_stand_list$name
+internal_stand_summed <- reshape2::melt(internal_stand_summed, id.vars = c("HMDB_code", "HMDB.name"))
+colnames(internal_stand_summed) <- c("HMDB.code", "HMDB.name", "Sample", "Intensity")
+internal_stand_summed$Matrix <- dims_matrix
+internal_stand_summed$Rundate <- rundate
+internal_stand_summed$Project <- project
+internal_stand_summed$Intensity <- as.numeric(as.character(internal_stand_summed$Intensity))
 
 # Retrieve IS positive mode
-is_pos <- as.data.frame(subset(outlist_pos_adducts_hmdb, rownames(outlist_pos_adducts_hmdb) %in% is_codes))
-is_pos$HMDB_name <- is_list[match(row.names(is_pos), is_list$HMDB_code, nomatch = NA), "name"]
-is_pos$HMDB.code <- row.names(is_pos)
-is_pos <- reshape2::melt(is_pos, id.vars = c("HMDB.code", "HMDB_name"))
-colnames(is_pos) <- c("HMDB.code", "HMDB.name", "Sample", "Intensity")
-is_pos$Matrix <- dims_matrix
-is_pos$Rundate <- rundate
-is_pos$Project <- project
-is_pos$Intensity <- as.numeric(as.character(is_pos$Intensity))
+internal_stand_pos <- as.data.frame(subset(outlist_pos_adducts_hmdb, rownames(outlist_pos_adducts_hmdb) %in% internal_stand_codes))
+internal_stand_pos$HMDB_name <- internal_stand_list[match(row.names(internal_stand_pos), internal_stand_list$HMDB_code, nomatch = NA), "name"]
+internal_stand_pos$HMDB.code <- row.names(internal_stand_pos)
+internal_stand_pos <- reshape2::melt(internal_stand_pos, id.vars = c("HMDB.code", "HMDB_name"))
+colnames(internal_stand_pos) <- c("HMDB.code", "HMDB.name", "Sample", "Intensity")
+internal_stand_pos$Matrix <- dims_matrix
+internal_stand_pos$Rundate <- rundate
+internal_stand_pos$Project <- project
+internal_stand_pos$Intensity <- as.numeric(as.character(internal_stand_pos$Intensity))
 
 # Retrieve IS negative mode
-is_neg <- as.data.frame(subset(outlist_neg_adducts_hmdb, rownames(outlist_neg_adducts_hmdb) %in% is_codes))
-is_neg$HMDB_name <- is_list[match(row.names(is_neg), is_list$HMDB_code, nomatch = NA), "name"]
-is_neg$HMDB.code <- row.names(is_neg)
-is_neg <- reshape2::melt(is_neg, id.vars = c("HMDB.code", "HMDB_name"))
-colnames(is_neg) <- c("HMDB.code", "HMDB.name", "Sample", "Intensity")
-is_neg$Matrix <- dims_matrix
-is_neg$Rundate <- rundate
-is_neg$Project <- project
-is_neg$Intensity <- as.numeric(as.character(is_neg$Intensity))
+internal_stand_neg <- as.data.frame(subset(outlist_neg_adducts_hmdb, rownames(outlist_neg_adducts_hmdb) %in% internal_stand_codes))
+internal_stand_neg$HMDB_name <- internal_stand_list[match(row.names(internal_stand_neg), internal_stand_list$HMDB_code, nomatch = NA), "name"]
+internal_stand_neg$HMDB.code <- row.names(internal_stand_neg)
+internal_stand_neg <- reshape2::melt(internal_stand_neg, id.vars = c("HMDB.code", "HMDB_name"))
+colnames(internal_stand_neg) <- c("HMDB.code", "HMDB.name", "Sample", "Intensity")
+internal_stand_neg$Matrix <- dims_matrix
+internal_stand_neg$Rundate <- rundate
+internal_stand_neg$Project <- project
+internal_stand_neg$Intensity <- as.numeric(as.character(internal_stand_neg$Intensity))
 
 # Save results
-save(is_pos, is_neg, is_summed, file = paste0(outdir, "/", project, "_IS_results.RData"))
+save(internal_stand_pos, internal_stand_neg, internal_stand_summed, file = paste0(outdir, "/", project, "_IS_results.RData"))
 
 # number of samples, for plotting length and width
 sample_count <- length(repl_pattern)
 
 # change the order of the x-axis summed plots to a natural sorted one
-sample_naturalorder <- unique(as.character(is_summed$Sample))
+sample_naturalorder <- unique(as.character(internal_stand_summed$Sample))
 sample_naturalorder <- stringr::str_sort(sample_naturalorder, numeric = TRUE)
-is_summed$Sample_level <- factor(is_summed$Sample, levels = c(sample_naturalorder))
-is_pos$Sample_level <- factor(is_pos$Sample, levels = c(sample_naturalorder))
-is_neg$Sample_level <- factor(is_neg$Sample, levels = c(sample_naturalorder))
+internal_stand_summed$Sample_level <- factor(internal_stand_summed$Sample, levels = c(sample_naturalorder))
+internal_stand_pos$Sample_level <- factor(internal_stand_pos$Sample, levels = c(sample_naturalorder))
+internal_stand_neg$Sample_level <- factor(internal_stand_neg$Sample, levels = c(sample_naturalorder))
 
 ## bar plots with all IS
 # theme for all IS bar plots
-theme_is_bar <- function(my_plot) {
+theme_internal_stand_bar <- function(my_plot) {
   my_plot +
     ggplot2::scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
     ggplot2::theme(legend.position = "none", 
@@ -392,42 +392,42 @@ theme_is_bar <- function(my_plot) {
 }
 
 # ggplot functions
-is_neg_bar_plot <- ggplot(is_neg, aes(Sample_level, Intensity)) +
+internal_stand_neg_bar_plot <- ggplot(internal_stand_neg, aes(Sample_level, Intensity)) +
   ggtitle("Interne Standaard (Neg)") +
   geom_bar(aes(fill = HMDB.name), stat = "identity") +
   labs(x = "", y = "Intensity") +
   facet_wrap(~HMDB.name, scales = "free_y")
 
-is_pos_bar_plot <- ggplot(is_pos, aes(Sample_level, Intensity)) +
+internal_stand_pos_bar_plot <- ggplot(internal_stand_pos, aes(Sample_level, Intensity)) +
   ggtitle("Interne Standaard (Pos)") +
   geom_bar(aes(fill = HMDB.name), stat = "identity") +
   labs(x = "", y = "Intensity") +
   facet_wrap(~HMDB.name, scales = "free_y")
 
-is_sum_bar_plot <- ggplot(is_summed, aes(Sample_level, Intensity)) +
+internal_stand_sum_bar_plot <- ggplot(internal_stand_summed, aes(Sample_level, Intensity)) +
   ggtitle("Interne Standaard (Summed)") +
   geom_bar(aes(fill = HMDB.name), stat = "identity") +
   labs(x = "", y = "Intensity") +
   facet_wrap(~HMDB.name, scales = "free_y")
 
 # add theme to ggplot functions
-is_neg_bar_plot <- theme_is_bar(is_neg_bar_plot)
-is_pos_bar_plot <- theme_is_bar(is_pos_bar_plot)
-is_sum_bar_plot <- theme_is_bar(is_sum_bar_plot)
+internal_stand_neg_bar_plot <- theme_internal_stand_bar(internal_stand_neg_bar_plot)
+internal_stand_pos_bar_plot <- theme_internal_stand_bar(internal_stand_pos_bar_plot)
+internal_stand_sum_bar_plot <- theme_internal_stand_bar(internal_stand_sum_bar_plot)
 
 # save plots to disk
 plot_width <- 9 + 0.35 * sample_count
 ggsave(paste0(outdir, "/plots/IS_bar_all_neg.png"), 
-       plot = is_neg_bar_plot, height = plot_width / 2.5, width = plot_width, units = "in")
+       plot = internal_stand_neg_bar_plot, height = plot_width / 2.5, width = plot_width, units = "in")
 ggsave(paste0(outdir, "/plots/IS_bar_all_pos.png"), 
-       plot = is_pos_bar_plot, height = plot_width / 2.5, width = plot_width, units = "in")
+       plot = internal_stand_pos_bar_plot, height = plot_width / 2.5, width = plot_width, units = "in")
 ggsave(paste0(outdir, "/plots/IS_bar_all_sum.png"), 
-       plot = is_sum_bar_plot, height = plot_width / 2.5, width = plot_width, units = "in")
+       plot = internal_stand_sum_bar_plot, height = plot_width / 2.5, width = plot_width, units = "in")
 
 ## Line plots with all IS
 # function for ggplot theme
 # add smaller legend in the "all IS line plots", otherwise out-of-range when more than 13 IS lines
-theme_is_line <- function(my_plot) {
+theme_internal_stand_line <- function(my_plot) {
   my_plot +
     guides(shape = guide_legend(override.aes = list(size = 0.5)), 
 	   color = guide_legend(override.aes = list(size = 0.5))
@@ -440,44 +440,44 @@ theme_is_line <- function(my_plot) {
 }
 
 # ggplot functions
-is_neg_line_plot <- ggplot(is_neg, aes(Sample_level, Intensity)) +
+internal_stand_neg_line_plot <- ggplot(internal_stand_neg, aes(Sample_level, Intensity)) +
   ggtitle("Interne Standaard (Neg)") +
   geom_point(aes(col = HMDB.name)) +
   geom_line(aes(col = HMDB.name, group = HMDB.name)) +
   labs(x = "", y = "Intensity")
 
-is_pos_line_plot <- ggplot(is_pos, aes(Sample_level, Intensity)) +
+internal_stand_pos_line_plot <- ggplot(internal_stand_pos, aes(Sample_level, Intensity)) +
   ggtitle("Interne Standaard (Pos)") +
   geom_point(aes(col = HMDB.name)) +
   geom_line(aes(col = HMDB.name, group = HMDB.name)) +
   labs(x = "", y = "Intensity")
 
-is_sum_line_plot <- ggplot(is_summed, aes(Sample_level, Intensity)) +
+internal_stand_sum_line_plot <- ggplot(internal_stand_summed, aes(Sample_level, Intensity)) +
   ggtitle("Interne Standaard (Sum)") +
   geom_point(aes(col = HMDB.name)) +
   geom_line(aes(col = HMDB.name, group = HMDB.name)) +
   labs(x = "", y = "Intensity")
 
 # add theme to ggplot functions
-is_sum_line_plot <- theme_is_line(is_sum_line_plot)
-is_neg_line_plot <- theme_is_line(is_neg_line_plot)
-is_pos_line_plot <- theme_is_line(is_pos_line_plot)
+internal_stand_sum_line_plot <- theme_internal_stand_line(internal_stand_sum_line_plot)
+internal_stand_neg_line_plot <- theme_internal_stand_line(internal_stand_neg_line_plot)
+internal_stand_pos_line_plot <- theme_internal_stand_line(internal_stand_pos_line_plot)
 
 # save plots to disk
 plot_width <- 8 + 0.2 * sample_count
 ggsave(paste0(outdir, "/plots/IS_line_all_neg.png"), 
-       plot = is_neg_line_plot, height = plot_width / 2.5, width = plot_width, units = "in")
+       plot = internal_stand_neg_line_plot, height = plot_width / 2.5, width = plot_width, units = "in")
 ggsave(paste0(outdir, "/plots/IS_line_all_pos.png"), 
-       plot = is_pos_line_plot, height = plot_width / 2.5, width = plot_width, units = "in")
+       plot = internal_stand_pos_line_plot, height = plot_width / 2.5, width = plot_width, units = "in")
 ggsave(paste0(outdir, "/plots/IS_line_all_sum.png"), 
-       plot = is_sum_line_plot, height = plot_width / 2.5, width = plot_width, units = "in")
+       plot = internal_stand_sum_line_plot, height = plot_width / 2.5, width = plot_width, units = "in")
 
 ## bar plots with a selection of IS
-is_neg_selection <- c("2H2-Ornithine (IS)", "2H3-Glutamate (IS)", "2H2-Citrulline (IS)", "2H4_13C5-Arginine (IS)", 
+internal_stand_neg_selection <- c("2H2-Ornithine (IS)", "2H3-Glutamate (IS)", "2H2-Citrulline (IS)", "2H4_13C5-Arginine (IS)", 
 		      "13C6-Tyrosine (IS)")
-is_pos_selection <- c("2H4-Alanine (IS)", "13C6-Phenylalanine (IS)", "2H4_13C5-Arginine (IS)", "2H3-Propionylcarnitine (IS)", 
+internal_stand_pos_selection <- c("2H4-Alanine (IS)", "13C6-Phenylalanine (IS)", "2H4_13C5-Arginine (IS)", "2H3-Propionylcarnitine (IS)", 
 		      "2H9-Isovalerylcarnitine (IS)")
-is_sum_selection <- c("2H8-Valine (IS)", "2H3-Leucine (IS)", "2H3-Glutamate (IS)", "2H4_13C5-Arginine (IS)", 
+internal_stand_sum_selection <- c("2H8-Valine (IS)", "2H3-Leucine (IS)", "2H3-Glutamate (IS)", "2H4_13C5-Arginine (IS)", 
 		      "13C6-Tyrosine (IS)")
 
 # add minimal intensity lines based on matrix (DBS or Plasma) and machine mode (neg, pos, sum)
@@ -485,111 +485,111 @@ if (dims_matrix == "DBS") {
   hline_data_neg <-
     data.frame(
       z = c(15000, 200000, 130000, 18000, 50000),
-      HMDB.name = is_neg_selection
+      HMDB.name = internal_stand_neg_selection
     )
   hline_data_pos <-
     data.frame(
       z = c(150000, 3300000, 1750000, 150000, 270000),
-      HMDB.name = is_pos_selection
+      HMDB.name = internal_stand_pos_selection
     )
   hline_data_sum <-
     data.frame(
       z = c(1300000, 2500000, 500000, 1800000, 1400000),
-      HMDB.name = is_sum_selection
+      HMDB.name = internal_stand_sum_selection
     )
 } else if (dims_matrix == "Plasma") {
   hline_data_neg <-
     data.frame(
       z = c(6500, 100000, 75000, 7500, 25000),
-      HMDB.name = is_neg_selection
+      HMDB.name = internal_stand_neg_selection
     )
   hline_data_pos <-
     data.frame(
       z = c(85000, 1000000, 425000, 70000, 180000),
-      HMDB.name = is_pos_selection
+      HMDB.name = internal_stand_pos_selection
     )
   hline_data_sum <-
     data.frame(
       z = c(700000, 1250000, 150000, 425000, 300000),
-      HMDB.name = is_sum_selection
+      HMDB.name = internal_stand_sum_selection
     )
 }
 
 # ggplot functions
-is_neg_selection_barplot <- ggplot(subset(is_neg, HMDB.name %in% is_neg_selection), aes(Sample_level, Intensity)) +
+internal_stand_neg_selection_barplot <- ggplot(subset(internal_stand_neg, HMDB.name %in% internal_stand_neg_selection), aes(Sample_level, Intensity)) +
   ggtitle("Interne Standaard (Neg)") +
   geom_bar(aes(fill = HMDB.name), stat = "identity") +
   labs(x = "", y = "Intensity") +
   facet_wrap(~HMDB.name, scales = "free", ncol = 2) +
   if (exists("hline_data_neg")) {
-    geom_hline(aes(yintercept = z), subset(hline_data_neg, HMDB.name %in% is_neg$HMDB.name))
+    geom_hline(aes(yintercept = z), subset(hline_data_neg, HMDB.name %in% internal_stand_neg$HMDB.name))
   }
 
-is_pos_selection_barplot <- ggplot(subset(is_pos, HMDB.name %in% is_pos_selection), aes(Sample_level, Intensity)) +
+internal_stand_pos_selection_barplot <- ggplot(subset(internal_stand_pos, HMDB.name %in% internal_stand_pos_selection), aes(Sample_level, Intensity)) +
   ggtitle("Interne Standaard (Pos)") +
   geom_bar(aes(fill = HMDB.name), stat = "identity") +
   labs(x = "", y = "Intensity") +
   facet_wrap(~HMDB.name, scales = "free", ncol = 2) +
   if (exists("hline_data_pos")) {
-    geom_hline(aes(yintercept = z), subset(hline_data_pos, HMDB.name %in% is_pos$HMDB.name))
+    geom_hline(aes(yintercept = z), subset(hline_data_pos, HMDB.name %in% internal_stand_pos$HMDB.name))
   }
 
-is_sum_selection_barplot <- ggplot(subset(is_summed, HMDB.name %in% is_sum_selection), aes(Sample_level, Intensity)) +
+internal_stand_sum_selection_barplot <- ggplot(subset(internal_stand_summed, HMDB.name %in% internal_stand_sum_selection), aes(Sample_level, Intensity)) +
   ggtitle("Interne Standaard (Sum)") +
   geom_bar(aes(fill = HMDB.name), stat = "identity") +
   labs(x = "", y = "Intensity") +
   facet_wrap(~HMDB.name, scales = "free", ncol = 2) +
   if (exists("hline_data_sum")) {
-    geom_hline(aes(yintercept = z), subset(hline_data_sum, HMDB.name %in% is_summed$HMDB.name))
+    geom_hline(aes(yintercept = z), subset(hline_data_sum, HMDB.name %in% internal_stand_summed$HMDB.name))
   }
 
 # add theme to ggplot functions
-is_neg_selection_barplot <- theme_is_bar(is_neg_selection_barplot)
-is_pos_selection_barplot <- theme_is_bar(is_pos_selection_barplot)
-is_sum_selection_barplot <- theme_is_bar(is_sum_selection_barplot)
+internal_stand_neg_selection_barplot <- theme_internal_stand_bar(internal_stand_neg_selection_barplot)
+internal_stand_pos_selection_barplot <- theme_internal_stand_bar(internal_stand_pos_selection_barplot)
+internal_stand_sum_selection_barplot <- theme_internal_stand_bar(internal_stand_sum_selection_barplot)
 
 # save plots to disk
 plot_width <- 9 + 0.35 * sample_count
 ggsave(paste0(outdir, "/plots/IS_bar_select_neg.png"), 
-       plot = is_neg_selection_barplot, height = plot_width / 2.0, width = plot_width, units = "in")
+       plot = internal_stand_neg_selection_barplot, height = plot_width / 2.0, width = plot_width, units = "in")
 ggsave(paste0(outdir, "/plots/IS_bar_select_pos.png"), 
-       plot = is_pos_selection_barplot, height = plot_width / 2.0, width = plot_width, units = "in")
+       plot = internal_stand_pos_selection_barplot, height = plot_width / 2.0, width = plot_width, units = "in")
 ggsave(paste0(outdir, "/plots/IS_bar_select_sum.png"), 
-       plot = is_sum_selection_barplot, height = plot_width / 2.0, width = plot_width, units = "in")
+       plot = internal_stand_sum_selection_barplot, height = plot_width / 2.0, width = plot_width, units = "in")
 
 ## line plots with a selection of IS
 # ggplot functions
-is_neg_selection_lineplot <- ggplot(subset(is_neg, HMDB.name %in% is_neg_selection), aes(Sample_level, Intensity)) +
+internal_stand_neg_selection_lineplot <- ggplot(subset(internal_stand_neg, HMDB.name %in% internal_stand_neg_selection), aes(Sample_level, Intensity)) +
   ggtitle("Interne Standaard (Neg)") +
   geom_point(aes(col = HMDB.name)) +
   geom_line(aes(col = HMDB.name, group = HMDB.name)) +
   labs(x = "", y = "Intensity")
 
-is_pos_selection_lineplot <- ggplot(subset(is_pos, HMDB.name %in% is_pos_selection), aes(Sample_level, Intensity)) +
+internal_stand_pos_selection_lineplot <- ggplot(subset(internal_stand_pos, HMDB.name %in% internal_stand_pos_selection), aes(Sample_level, Intensity)) +
   ggtitle("Interne Standaard (Pos)") +
   geom_point(aes(col = HMDB.name)) +
   geom_line(aes(col = HMDB.name, group = HMDB.name)) +
   labs(x = "", y = "Intensity")
 
-is_sum_selection_lineplot <- ggplot(subset(is_summed, HMDB.name %in% is_sum_selection), aes(Sample_level, Intensity)) +
+internal_stand_sum_selection_lineplot <- ggplot(subset(internal_stand_summed, HMDB.name %in% internal_stand_sum_selection), aes(Sample_level, Intensity)) +
   ggtitle("Interne Standaard (Sum)") +
   geom_point(aes(col = HMDB.name)) +
   geom_line(aes(col = HMDB.name, group = HMDB.name)) +
   labs(x = "", y = "Intensity")
 
 # add theme to ggplot functions
-is_neg_selection_lineplot <- theme_is_line(is_neg_selection_lineplot)
-is_pos_selection_lineplot <- theme_is_line(is_pos_selection_lineplot)
-is_sum_selection_lineplot <- theme_is_line(is_sum_selection_lineplot)
+internal_stand_neg_selection_lineplot <- theme_internal_stand_line(internal_stand_neg_selection_lineplot)
+internal_stand_pos_selection_lineplot <- theme_internal_stand_line(internal_stand_pos_selection_lineplot)
+internal_stand_sum_selection_lineplot <- theme_internal_stand_line(internal_stand_sum_selection_lineplot)
 
 # save plots to disk
 plot_width <- 8 + 0.2 * sample_count
 ggsave(paste0(outdir, "/plots/IS_line_select_neg.png"), 
-       plot = is_neg_selection_lineplot, height = plot_width / 2.5, width = plot_width, units = "in")
+       plot = internal_stand_neg_selection_lineplot, height = plot_width / 2.5, width = plot_width, units = "in")
 ggsave(paste0(outdir, "/plots/IS_line_select_pos.png"), 
-       plot = is_pos_selection_lineplot, height = plot_width / 2.5, width = plot_width, units = "in")
+       plot = internal_stand_pos_selection_lineplot, height = plot_width / 2.5, width = plot_width, units = "in")
 ggsave(paste0(outdir, "/plots/IS_line_select_sum.png"), 
-       plot = is_sum_selection_lineplot, height = plot_width / 2.5, width = plot_width, units = "in")
+       plot = internal_stand_sum_selection_lineplot, height = plot_width / 2.5, width = plot_width, units = "in")
 
 
 ### POSITIVE CONTROLS CHECK
@@ -683,25 +683,25 @@ calculate_coefficient_of_variation <- function(intensity_list) {
 }
 
 # Internal standards lists, calculate coefficients of variation
-if ("plots" %in% colnames(is_list)) {
-  intensity_col_ids <- 2:(which(colnames(is_list) == "HMDB_name") - 1)
+if ("plots" %in% colnames(internal_stand_list)) {
+  intensity_col_ids <- 2:(which(colnames(internal_stand_list) == "HMDB_name") - 1)
 } else {
-  intensity_col_ids <- 1:(which(colnames(is_list) == "HMDB_name") - 1)
+  intensity_col_ids <- 1:(which(colnames(internal_stand_list) == "HMDB_name") - 1)
 }
 # previous intensity_col_ids is based on the assumption that there are Controls and Patients
-is_list_intensities <- is_list[ , intensity_col_ids]
-is_list_intensities <- calculate_coefficient_of_variation(is_list_intensities)
-is_list_intensities <- cbind(IS_name = is_list$HMDB_name, is_list_intensities)
+internal_stand_list_intensities <- internal_stand_list[ , intensity_col_ids]
+internal_stand_list_intensities <- calculate_coefficient_of_variation(internal_stand_list_intensities)
+internal_stand_list_intensities <- cbind(IS_name = internal_stand_list$HMDB_name, internal_stand_list_intensities)
 
 # separate adducts of IS
-is_pos <- as.data.frame(subset(outlist_pos_adducts_hmdb, rownames(outlist_pos_adducts_hmdb) %in% is_codes))
-is_neg <- as.data.frame(subset(outlist_neg_adducts_hmdb, rownames(outlist_neg_adducts_hmdb) %in% is_codes))
-is_pos_intensities <- is_pos[ , -which(colnames(is_pos) == "HMDB_name")]
-is_neg_intensities <- is_neg[ , -which(colnames(is_neg) == "HMDB_name")]
-is_pos_intensities <- calculate_coefficient_of_variation(is_pos_intensities)
-is_neg_intensities <- calculate_coefficient_of_variation(is_neg_intensities)
-is_pos_intensities <- cbind(IS_name = is_pos$HMDB_name, is_pos_intensities)
-is_neg_intensities <- cbind(IS_name = is_neg$HMDB_name, is_neg_intensities)
+internal_stand_pos <- as.data.frame(subset(outlist_pos_adducts_hmdb, rownames(outlist_pos_adducts_hmdb) %in% internal_stand_codes))
+internal_stand_neg <- as.data.frame(subset(outlist_neg_adducts_hmdb, rownames(outlist_neg_adducts_hmdb) %in% internal_stand_codes))
+internal_stand_pos_intensities <- internal_stand_pos[ , -which(colnames(internal_stand_pos) == "HMDB_name")]
+internal_stand_neg_intensities <- internal_stand_neg[ , -which(colnames(internal_stand_neg) == "HMDB_name")]
+internal_stand_pos_intensities <- calculate_coefficient_of_variation(internal_stand_pos_intensities)
+internal_stand_neg_intensities <- calculate_coefficient_of_variation(internal_stand_neg_intensities)
+internal_stand_pos_intensities <- cbind(IS_name = internal_stand_pos$HMDB_name, internal_stand_pos_intensities)
+internal_stand_neg_intensities <- cbind(IS_name = internal_stand_neg$HMDB_name, internal_stand_neg_intensities)
 
 # SST components. 
 sst_comp <- read.csv(sst_components_file, header = TRUE, sep = "\t")
@@ -729,13 +729,13 @@ sst_list_intensities <- cbind(SST_comp_name = sst_list$HMDB_name, sst_list_inten
 # Create Excel file
 wb <- createWorkbook("IS_SST")
 addWorksheet(wb, "Internal Standards")
-openxlsx::writeData(wb, sheet = 1, is_list_intensities)
+openxlsx::writeData(wb, sheet = 1, internal_stand_list_intensities)
 setColWidths(wb, 1, cols = 1, widths = 24)
 addWorksheet(wb, "IS pos")
-openxlsx::writeData(wb, sheet = 2, is_pos_intensities)
+openxlsx::writeData(wb, sheet = 2, internal_stand_pos_intensities)
 setColWidths(wb, 2, cols = 1, widths = 24)
 addWorksheet(wb, "IS neg")
-openxlsx::writeData(wb, sheet = 3, is_neg_intensities)
+openxlsx::writeData(wb, sheet = 3, internal_stand_neg_intensities)
 setColWidths(wb, 3, cols = 1, widths = 24)
 addWorksheet(wb, "SST components")
 openxlsx::writeData(wb, sheet = 4, sst_list_intensities)

From 882c4034fad8f29f26ff54846018f377c431d7c5 Mon Sep 17 00:00:00 2001
From: ALuesink <anneluesink@live.nl>
Date: Thu, 14 Nov 2024 14:07:55 +0100
Subject: [PATCH 17/18] Changes according to the code review

---
 DIMS/AverageTechReplicates.R | 2 +-
 DIMS/GenerateExcel.R         | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/DIMS/AverageTechReplicates.R b/DIMS/AverageTechReplicates.R
index 0c57ad7..98a2d41 100644
--- a/DIMS/AverageTechReplicates.R
+++ b/DIMS/AverageTechReplicates.R
@@ -53,7 +53,7 @@ if (dims_matrix == "DBS") {
   thresh2remove <- 500000000
 }
 if (highest_mz > 700) {
-  thresh2remove <- 10000000
+  thresh2remove <- 1000000
 }
 
 
diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R
index 28b2707..04860ea 100644
--- a/DIMS/GenerateExcel.R
+++ b/DIMS/GenerateExcel.R
@@ -46,8 +46,8 @@ robust_scaler <- function(control_intensities, control_col_ids, perc = 5) {
   return(trimmed_control_intensities)
 }
 
-remove_outliers_for_Zscore <- function(control_intensities, outlier_threshold = 2) {
-  #' Remove outliers per metabolite before calculating Z-scores
+remove_outliers_grubbs <- function(control_intensities, outlier_threshold = 2) {
+  #' Remove outliers per metabolite according to Grubb's test
   #'
   #' @param control_intensities: Vector with intensities for control samples
   #' @param outlier_threshold: Threshold for outliers which will be removed from controls (float)
@@ -211,7 +211,7 @@ if (z_score == 1) {
   # calculate Z-scores after removal of outliers in Control samples
   if (length(control_col_ids) > 10) {
     for (metabolite_index in 1:nrow(outlist_nooutliers)) {
-      intensities_without_outliers <- remove_outliers_for_Zscore(as.numeric(outlist_nooutliers[metabolite_index, control_col_ids]), outlier_threshold)
+      intensities_without_outliers <- remove_outliers_grubbs(as.numeric(outlist_nooutliers[metabolite_index, control_col_ids]), outlier_threshold)
       outlist_nooutliers$avg.ctrls[metabolite_index] <- mean(intensities_without_outliers)
       outlist_nooutliers$sd.ctrls[metabolite_index]  <- sd(intensities_without_outliers)
       outlist_nooutliers$nr.ctrls[metabolite_index]  <- length(intensities_without_outliers)

From 74180ee98c12ce27eda4361e1dc011ce9a983a3a Mon Sep 17 00:00:00 2001
From: ALuesink <anneluesink@live.nl>
Date: Thu, 14 Nov 2024 14:36:33 +0100
Subject: [PATCH 18/18] Added docstring to function
 calculate_coefficient_of_variation

---
 DIMS/GenerateExcel.R | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/DIMS/GenerateExcel.R b/DIMS/GenerateExcel.R
index 04860ea..fd42b4f 100644
--- a/DIMS/GenerateExcel.R
+++ b/DIMS/GenerateExcel.R
@@ -668,6 +668,11 @@ if (z_score == 1) {
 
 ### SST components output ####
 calculate_coefficient_of_variation <- function(intensity_list) {
+  #' Calculate coefficent of variation (cv) based on standard deviation (sd) and mean
+  #'
+  #' @param intensity_list: Matrix with intensities
+  #'
+  #' @return intensity_list_with_cv: Matrix with intensities and cv, mean, sd
   for (col_nr in 1:ncol(intensity_list)) {
     intensity_list[, col_nr] <- as.numeric(intensity_list[, col_nr])
     intensity_list[, col_nr] <- round(intensity_list[, col_nr], 0)