diff --git a/tdtK_Full_Analysis_script_v0.7.R b/tdtK_Full_Analysis_script_v0.7.R
new file mode 100644
index 0000000..377166a
--- /dev/null
+++ b/tdtK_Full_Analysis_script_v0.7.R
@@ -0,0 +1,2690 @@
+###### Complete Rscript to run TdtK Analysis
+## Run the entire script from within R or Rstudio
+######
+
+### NOTES ########## 
+#  If ImageJ/Fiji and/or showinf are NOT found please follows these steps:
+#  Specifically for RStudio (***Linux***) the path to ImageJ-linux64 needs to be set
+#  by replacing "/home/geo/Fiji.app/" with the path to YOUR ImageJ-linux64
+#  executable. ONLY necessary if the script can't find ImageJ from within
+#  Rstudio (but it would work in terminal mode)
+Sys.setenv(PATH=paste(Sys.getenv("PATH"), "/home/geo/Fiji.app/", sep=":"))
+
+#  Another example for ***MacOS*** (if ImageJ-macosx resides in "/Applications/Fiji.app/Contents/macos/"):
+Sys.setenv(PATH=paste(Sys.getenv("PATH"), "/Applications/Fiji.app/Contents/macos/", sep=":"))
+
+#  For ***Windows***, edit System Environment Variables and the location of the ImageJ-win64.exe and showinf.bat
+#  to the PATH variable.
+
+# Hard-coded locations if interactive mode is not required (uncomment and replace with relevant paths)
+#
+# answer_info <- FALSE
+# script_dir <- c("~/Rserver_tdtK")
+# movie_dir <- c("/home/geo/bacon_mount/Clara/tdtK-data/EpigeneticScreen-7wk/")
+# target_dir <- c("~/GIANT/balled_MAYO_only")
+# mappings_file <- c("~/GIANT/balled_MAYO_only/excellent traces/mappings.xlsx")
+# mappings_file <- c("~/GIANT/balled_others/good traces/mappings.xlsx")
+
+{
+  options(java.parameters = "-Xmx11g" )
+  
+  
+  answer_info <- rstudioapi::showQuestion("Enter folder and file?", "\"Yes\" will ask for files and folder locations.", ok = "Yes", cancel = "No")
+  if (answer_info == TRUE | exists("OS") == FALSE) {
+    
+    message("Check if Fiji/ImageJ and bftools are installed...")
+    # Check whether Fiji/ImageJ and bftools are installed
+    if (.Platform$OS.type == "windows") { 
+      showninf_installed <- system('showinf.bat -version') == 0
+      ImageJ_installed <- system('ImageJ-win64.exe --ij2 --headless') == 0
+      OS <- "windows"
+      
+    } else if (Sys.info()["sysname"] == "Darwin") {
+      showninf_installed <- system('showinf -version') == 0
+      ImageJ_installed <- system('ImageJ-macosx --ij2 --headless') == 0
+      
+      if (ImageJ_installed == FALSE)
+      {
+        Sys.setenv(PATH=paste(Sys.getenv("PATH"), "/Applications/Fiji.app/Contents/macos/", sep=":"))
+      }
+      
+      ImageJ_installed <- system('ImageJ-macosx --ij2 --headless') == 0
+      OS <- "Darwin"
+      
+    } else if (.Platform$OS.type == "unix") { 
+      showninf_installed <- system('showinf -version') == 0
+      ImageJ_installed <- system('ImageJ-linux64 --ij2 --headless') == 0
+      OS <- "unix"
+      
+    } else if (.Platform$OS.type == "Linux") { 
+      showninf_installed <- system('showinf -version') == 0
+      ImageJ_installed <- system('ImageJ-linux64 --ij2 --headless') == 0
+      OS <- "unix"    
+      
+    } else {
+      showninf_installed <- "FALSE"
+      ImageJ_installed <- "FALSE"
+    }
+    
+    ifelse(showninf_installed == TRUE & ImageJ_installed == TRUE, paste0("Everything is installed"),
+           ifelse(showninf_installed == FALSE & ImageJ_installed == FALSE, stop(paste0("bftools and Fiji/ImageJ are missing !")),
+                  ifelse(showninf_installed == FALSE & ImageJ_installed == TRUE, stop(paste0("bftools is missing !")), stop(paste0("ImageJ/Fiji is missing !")))))
+    
+    
+    # Where are the script files?
+    script_dir <- rstudioapi::selectDirectory(caption = "Select Directory containing Rscript Files", label = "Rscript Files" )
+    IJscript <- normalizePath(file.path(script_dir,"macro.ijm"))
+    
+    if(file.exists(IJscript) == FALSE)
+    {
+      stop("This folder does not contain the required file(s)")
+    }
+    
+    # Where are the movie files?
+    movie_dir <-rstudioapi::selectDirectory(caption = "Select Directory containing tdtK Movie Files", label = "tdtK Movie Files" )
+    
+    # Target directory - this is where all processed files will be analyzed
+    target_dir <- rstudioapi::selectDirectory(caption = "Select Directory for Final Processing", path = movie_dir, label = "Processing Folder" )
+    
+    # Point to the Mappings file
+    mappings_file <-  rstudioapi::selectFile(caption = "Select Genotype Mappings File", path = movie_dir, label = "mappings.xlsx -or- .csv")
+    
+    if (basename(mappings_file) == "mappings.xlsx")
+    {
+      mappings_file_type <- "XLSX"
+    } 
+    
+    if (basename(mappings_file) == "mappings.csv")
+    {
+      mappings_file_type <- "CSV"
+    } 
+    
+    if (basename(mappings_file) != "mappings.xlsx" && basename(mappings_file) != "mappings.csv")
+    {
+      stop("The mappings file is not named correctly")
+    }
+  }
+  
+  
+  # Run the first part of the script - this creates kymographs from each CXD file
+  # MAYO Screen Script No.1
+  # Imports CXD movies of tdtk hearts and writes TIFF files of Kymographs and overview images
+  
+  # Required libraries and functions
+  options(java.parameters = "-Xmx11g" )
+  
+  library(EBImage)
+  library(matrixStats)
+  library(RBioFormats)
+  library(pracma)
+  library(zoo)
+  library(stringr)
+  library(reshape2)
+  library(plyr)
+  library(dplyr)
+  library(tibble)
+  
+  CXDs_process_question <- rstudioapi::showQuestion("Do you need to reprocess CXD files?", "This will check for new .cxd files and process them.", ok = "Yes", cancel = "No")
+  ## CXDs_process_question <- TRUE  
+  if(CXDs_process_question == TRUE){
+   
+    # New find peaks function from https://stats.stackexchange.com/questions/22974/how-to-find-local-peaks-valleys-in-a-series-of-data
+    find_peaks <- function (x, m = 3){
+      shape <- diff(sign(diff(x, na.pad = FALSE)))
+      pks <- sapply(which(shape < 0), FUN = function(i){
+        z <- i - m + 1
+        z <- ifelse(z > 0, z, 1)
+        w <- i + m + 1
+        w <- ifelse(w < length(x), w, length(x))
+        if(all(x[c(z : i, (i + 2) : w)] <= x[i + 1])) return(i + 1) else return(numeric(0))
+      })
+      pks <- unlist(pks)
+      pks
+    }
+    
+    # And define a different find.max function
+    which.maxN <- function(x, N=2){ #N=2 means find the 2nd max
+      len <- length(x)
+      if(N>len){
+        warning('N greater than length(x).  Setting N=length(x)')
+        N <- length(x)
+      }
+      which(x == sort(x,partial=len-N+1)[len-N+1])
+    }
+    
+    
+    # Switch directory containing CXD files
+    setwd(movie_dir)
+    
+    # Get file list, for all necessary file types (.cxd)
+    
+    filelist <- NULL
+    filelist$cxd <- list.files(".", pattern = "\\.cxd$", recursive = TRUE)
+    
+    total <- length(filelist$cxd)
+    pb <- txtProgressBar(max = total, style = 3)
+    
+    for (f in 1:length(filelist$cxd))
+    {
+      setTxtProgressBar(pb, f)
+      
+      # Check if tiff files have already been generated - skip to next file
+      tiffs <- list.files(path = dirname(filelist$cxd[f]), pattern = paste0(basename(filelist$cxd[f]),'_peak_'))
+      if(length(tiffs) >= 1)
+      {
+        
+        if(file.exists(paste0(filelist$cxd[f],'_directionmarks','.csv')))
+        {
+          next
+        }
+        
+      }
+      
+      
+      if(file.size(filelist$cxd[f]) < 150000000) # OS-independent version
+      {
+        next
+      }   
+      
+      # Check if metadata file is present
+      
+      if(file.exists(paste0(substr(filelist$cxd[f],1,gregexpr('.cxd',filelist$cxd[f])[[1]][1]),"_new_meta_data.csv")))
+      {
+        meta.data <- read.csv(paste0(substr(filelist$cxd[f],1,gregexpr('.cxd',filelist$cxd[f])[[1]][1]),"_new_meta_data.csv"))
+      } else
+      {
+        # Read metadata of cxd file and store separately for later use. Also,
+        # check if file can be imported or is defective and skip if so
+        
+        if(Sys.info()['sysname'] == "Windows"){
+          meta.data <- as.data.frame(system(paste0('showinf.bat -nopix ', shQuote(filelist$cxd[f])), intern = TRUE), stringsAsFactors = FALSE)
+        } else {
+          meta.data <- as.data.frame(system(paste0("showinf -nopix ", shQuote(filelist$cxd[f])), intern = TRUE), stringsAsFactors = FALSE)
+        }
+        
+        meta.data[,1] <- gsub("\t", "", meta.data[,1])
+        meta.data <- as.data.frame(str_split_fixed(meta.data[,1], "[:=]", 2), stringsAsFactors = FALSE)
+        
+        # Correct for missing date/time info in file
+        test <- tryCatch(meta.data[c(which(meta.data$V1 == "Capture Region") + 1):c(which(meta.data$V1 == "File Info Last Field Date  Time")-1),], error=function(e) e)
+        if(inherits(test, "error")) 
+        {
+          
+          meta.data[length(meta.data$V1) + 1,1] <- c("File Info Last Field Date  Time")
+          meta.data[length(meta.data$V1),2] <- 11644444801
+          
+        }
+        
+        
+        
+        # Get timecodes for all frames
+        g <- meta.data
+        names(g) <- c("L1", "value")
+        test <- tryCatch(
+          g$value <- as.numeric(g$value), error=function(e){})
+        
+        
+        g$L1 <- paste0(g$L1,"    ")
+        g$Field <- unlist(lapply(g$L1, function(x) substring(x,gregexpr(' ',x)[[1]][1]+1,gregexpr(' ',x)[[1]][2]-1)))
+        g$Type <- unlist(lapply(g$L1, function(x) substring(x,gregexpr(' ',x)[[1]][[2]],gregexpr(' ',x)[[1]][[3]])))
+        g$Type2 <- unlist(lapply(g$Type, function(x)gsub("[[:space:]]", "", x)))
+        
+        cxd_fields_timecodes <- g[which(g$Type2 == "Time_From_Start"),]
+        cxd_fields_timecodes <- cxd_fields_timecodes[order(cxd_fields_timecodes$Field),]
+        cxd_fields_intervals <- g[which(g$Type2 == "Time_From_Last"),]
+        cxd_fields_intervals <- cxd_fields_intervals[order(cxd_fields_intervals$Field),]
+        
+        # Get time interval
+        total_time <- cxd_fields_timecodes[which.max(cxd_fields_timecodes$Field),2]
+        
+        cxd_meta.data <- NULL
+        cxd_meta.data$sizeX <- as.numeric(meta.data$V2[which(meta.data$V1 == "Width ")])
+        cxd_meta.data$sizeY <- as.numeric(meta.data$V2[which(meta.data$V1 == "Height ")])
+        cxd_meta.data$sizeZ <- as.numeric(meta.data$V2[which(meta.data$V1 == "SizeZ ")])
+        cxd_meta.data$sizeC <- 1
+        cxd_meta.data$sizeT <- as.numeric(meta.data$V2[which(meta.data$V1 == "SizeT ")])
+        
+        
+        # Check if files are very short (less than 200 frames) - skip to next file
+        
+        if(cxd_meta.data$sizeT < 200)
+        {
+          next
+        }  
+        
+        cxd_meta.data$pixelType <- gsub(" ", "", meta.data$V2[which(meta.data$V1 == "Pixel type ")])
+        cxd_meta.data$bitsPerPixel <- as.numeric(meta.data$V2[which(meta.data$V1 == "Valid bits per pixel ")])
+        cxd_meta.data$imageCount <- as.numeric(cxd_fields_timecodes[which.max(cxd_fields_timecodes$Field),3])
+        cxd_meta.data$dimensionOrder <- gsub(" ", "", meta.data$V2[which(meta.data$V1 == "Dimension order ")])
+        cxd_meta.data$orderCertain <- gsub(" ", "", meta.data$V2[which(meta.data$V1 == "Dimension order ")])
+        cxd_meta.data$rgb <- gsub(" ", "", meta.data$V2[which(meta.data$V1 == "RGB ")])
+        cxd_meta.data$littleEndian <- gsub(" ", "", meta.data$V2[which(meta.data$V1 == "Endianness ")])
+        cxd_meta.data$interleaved <- gsub(" ", "", meta.data$V2[which(meta.data$V1 == "Interleaved ")])
+        cxd_meta.data$falseColor <- 0
+        cxd_meta.data$metadataComplete <- 1
+        cxd_meta.data$thumbnail <- 0
+        cxd_meta.data$series <- 1
+        cxd_meta.data$resolutionLevel <- 1
+        cxd_meta.data$time_interval <- mean(as.numeric(cxd_fields_intervals$value), na.rm = TRUE)
+        
+        # Read Scale factor and binning from file - adjust if missing
+        scale_factor <- as.numeric(str_split_fixed(meta.data$V2[which(meta.data$V1 == "factor")], ";", 2)[1])
+        magnification <- as.numeric(str_split_fixed(meta.data$V2[which(meta.data$V1 == "magnification")], ";", 2)[1])
+        
+        if(is.na(magnification))
+        {
+          magnification <- 1  # if no magnification is given
+        }
+        
+        
+        if(scale_factor == 1 | is.null(scale_factor))
+        {
+          scale_factor <- 0.65 # if not calibrated and default to 1pxl - change to 10x setting for 6.5um CMOS chip.
+        }  
+        
+        if(is.na(scale_factor))
+        {
+          stop(paste0("No calibration data found for file ", filelist$cxd[f],". Please move this file into a different folder and re-run script again without this file."))
+        } 
+        
+        if(magnification == 2 & scale_factor == 0.65)   # To catch potentially wrongly set calibration in HCImage
+        {  
+          meta.data$coreMetadata$resolution <- 0.52
+        }
+        
+        cxd_meta.data$resolution  <- scale_factor * magnification
+        # CXD files from HCImage have a funky date of birth (Jan 1, 1601, 8:00am). We have to substract these seconds to get the Unix epoch
+        cxd_meta.data$created_unix_from_file <- c(as.numeric(meta.data$V2[which(meta.data$V1 == "File Info Last Field Date  Time")]) - 11644444800) 
+        write.csv(melt(cxd_meta.data), file = paste0(substr(filelist$cxd[f],1,gregexpr('.cxd',filelist$cxd[f])[[1]][1]),"_new_meta_data.csv"), row.names=FALSE)
+        meta.data <- melt(cxd_meta.data)
+      }
+      
+      
+      
+      X_ <- as.numeric(meta.data$value[1])
+      Y_ <- as.numeric(meta.data$value[2])
+      T_ <- as.numeric(meta.data$value[5])
+      timepoints <- 500
+      
+      if (T_ < timepoints)
+      {
+        timepoints <- T_
+      }
+      
+      # Adjustment for high-FPS movies - consider more timepoints
+      if(meta.data[which(meta.data$L1 == "time_interval"),1] > 0.010)
+      {
+        next
+      } else
+      {
+        timepoints <- T_
+      }
+      
+      
+      # Use maximum number of pixels x/y and default 
+      # img2 <- read.image(filelist$cxd[f], normalize = TRUE, subset = list(x = 1:X_, y = 1:Y_, t=1:timepoints))
+      img2 <- read.image(filelist$cxd[f], read.metadata = FALSE, normalize = FALSE)
+      Xpos <- filelist$Xpos[f]
+      image_ <- imageData(img2)
+      rm(img2)
+      # Normalize image 0 to 1
+      image_ <- image_ - min(image_)
+      image_ <- image_ / max(image_)
+      
+      # standard deviation of each pixel over time to indicate which change the most over time
+      y1 <- rowSds(matrix(image_, X_ * Y_, T_))
+      x <- matrix(y1, X_, Y_)
+      
+      # x <- apply(image_, c(1,2), sd)
+      x <- x / max(x)     # Normalize
+      
+      
+      # Reset all dark values to a minimum
+      x[which(x < quantile(x)[4])] <- 0 
+      
+      # Intensity profile along X-axis, then find the ones above threshold
+      v <- apply(x, 1, sum)
+      
+      above_thresh <- which(v >= mean(v))
+      below_thresh <- which(v < mean(v))
+      
+      # What are the largest gaps in the above_tresh indices
+      brights_ <- rle(diff(c(1,above_thresh)))
+      bright_borders <- data.frame(brights_$values)
+      bright_borders$lengths <- brights_$lengths
+      bright_borders$steps <- bright_borders$brights_.values * bright_borders$lengths
+      bright_borders$edge <- cumsum(bright_borders$steps)
+      
+      # below_thresh identifies the dark stripes - find the edges of them:
+      stripes_ <- rle(diff(c(1,below_thresh)))
+      stripe_borders <- data.frame(stripes_$values)
+      stripe_borders$lengths <- stripes_$lengths
+      stripe_borders$steps <- stripe_borders$stripes_.values * stripe_borders$lengths
+      stripe_borders$edge <- cumsum(stripe_borders$steps)
+      
+      # Dark /  bright matrix
+      border_mask <- data.frame(Xpos = c(1:dim(image_)[1]))  
+      border_mask$type[border_mask$Xpos %in% above_thresh] <- "bright"
+      border_mask$type[border_mask$Xpos %in% below_thresh] <- "dark"
+      border_mask$edge[border_mask$Xpos %in% above_thresh] <- 100
+      border_mask$edge[border_mask$Xpos %in% below_thresh] <- 1
+      
+      # Duplicate the last n (30) rows to fill
+      
+      border_mask[dim(image_)[1]:c(dim(image_)[1]+29),] <- border_mask[dim(image_)[1],]
+      borders_ <- rollapply(border_mask$edge, 30, median)
+      edges <- which(borders_ == 50.5)
+      
+      if (length(unique(border_mask$type[1:edges[1]])) != 1)
+      {
+        border_mask$type[1:edges[1]] <- border_mask$type[edges[1]]
+        border_mask$edge[1:edges[1]] <- border_mask$edge[edges[1]]
+      }
+      
+      if (border_mask$type[1] == "dark")
+      {
+        borders_ <- edges
+      } else  {
+        borders_ <- c(1,edges)
+      }
+      
+      if(length(borders_) < 2)
+      {
+        next
+      } else if(is.na(diff(borders_[1:2])))
+      {
+        next
+      }
+      
+      while (diff(borders_[1:2]) < 60)
+      {
+        
+        borders_ <- borders_[3:length(borders_)]
+        if(is.na(diff(borders_[1:2])))
+        {
+          break
+        }
+      }
+      
+      if(length(borders_) < 2)
+      {
+        next
+      }
+      
+      # Compute the intensities along X for each bordered area
+      bright_area <- apply(x, 1, trapz)
+      bright_area1 <- bright_area[borders_[1]:borders_[2]]
+      
+      if(length(borders_) <= 3)
+      {
+        bright_area2 <- 0
+      } else {
+        bright_area2 <- bright_area[borders_[3]:borders_[4]]
+      }
+      
+      # Reset minimum and find top 4 peaks in the 2 anterior stripes
+      bright_area1[which(bright_area1 < quantile(bright_area1)[4])] <- quantile(bright_area1)[4]
+      bright_area2[which(bright_area2 < quantile(bright_area2)[4])] <- quantile(bright_area2)[4]
+      
+      
+      peaks1 <- unique(find_peaks(bright_area1))
+      peaks2 <- unique(find_peaks(bright_area2))
+      
+      # Check if any peaks in peaks1 - if not, peaks2 becomes peaks1; reset borders as well
+      if (length(peaks1) == 0)
+      {
+        peaks1 <- peaks2
+        peaks2 <- peaks2[-c(1:length(peaks2))]
+        bright_area1 <- bright_area2
+        
+        borders_[1] <- borders_[3]
+        borders_[2] <- borders_[4]
+      }
+      
+      # If no peaks are found - skip to the next file
+      if (length(peaks1) == 0)
+      {
+        next  
+      }
+      
+      peaklist <- list()
+      peaklist[1] <- peaks1[which.max(bright_area1[peaks1])] + borders_[1]
+      
+      if(length(peaks1) > 1)
+      { 
+        peaklist[2] <- peaks1[which.maxN(bright_area1[peaks1], N=2)] + borders_[1]
+      }
+      
+      if (length(peaks1) > 2)
+      {
+        peaklist[3] <- peaks1[which.maxN(bright_area1[peaks1], N=3)] + borders_[1]
+      }
+      
+      
+      
+      if(length(peaks2) != 0)
+      {
+        peaklist[4] <- peaks2[which.max(bright_area2[peaks2])] + borders_[3]
+        if(length(peaks2) > 1)
+        {
+          peaklist[5] <- peaks2[which.maxN(bright_area2[peaks2], N=2)] + borders_[3]
+        }
+        if (length(peaks2) > 2)
+        {
+          peaklist[6] <- peaks2[which.maxN(bright_area2[peaks2], N=3)] + borders_[3]
+        }
+      }
+      
+      peaklist <- unlist(peaklist)
+      
+      
+      # Check if any peak is set outside the X-range - if so, set to max.X
+      if(any(peaklist > dim(image_)[1], na.rm = TRUE))
+      {
+        peaklist[which(peaklist > dim(image_)[1])] <- dim(image_)[1]
+      }
+      
+      # Highlight the Kymograph position in the image with a white line
+      
+      q <- x
+      q[peaklist,] <- 1
+      writeImage(q, file=paste0(substr(filelist$cxd[f],1,gregexpr('.cxd',filelist$cxd[f])[[1]][1]),'_SD and peaklines.tiff'))  
+      
+      peaklist <- peaklist[which(!is.na(peaklist))]
+      
+      if (length(peaklist) == 0)
+      {
+        next
+      }
+      
+      # Create kymographs for peaks
+      # Make sure to have unique peaks only !
+      peaklist <- unique(peaklist)
+      
+      kymograph <- matrix(nrow = Y_, ncol = timepoints)
+      graphs <- array(kymograph,dim = c(dim(image_)[2],dim(image_)[3],length(peaklist)))
+      
+      for (i in 1:length(peaklist)) # i-th peak-position {
+      {
+        for (j in 1:dim(image_)[3]) # all timepoints
+        {
+          kymograph[,j] <- image_[peaklist[i],,j]  
+        }	
+        graphs[,,i] <- kymograph
+      }
+      
+      
+      for (k in 1:length(peaklist))
+      {
+        writeImage(t(graphs[,,k]), file=paste0(filelist$cxd[f],'_peak_',k,'_at Xpos_', peaklist[k],'.tiff'))  
+      }
+      
+      ## Add directionality 
+      # Check if directions are already present - skip if TRUE
+      if(file.exists(paste0(filelist$cxd[f],'_directionmarks','.csv')))
+      {
+        next
+      }
+      # Peaks
+      
+      # Re-use the peaks already used for kymographs
+      k <- peaklist
+      
+      # Add 12 regularly spaced positions in addition
+      k <- c(k, seq(from= floor(dim(image_)[1] / 24), to=floor(dim(image_)[1] / 2), by=floor(dim(image_)[1] / 24))) 
+      k <- sort(k)
+      
+      # Exclude positions outside the '20pxl stripe area' below
+      k[which(k < 21)] <- 21
+      k[which(k > (dim(image_)[1]) - 21)] <- dim(image_)[1] - 21
+      k <- unique(k)
+      
+      
+      # Create an n pixel-wide Stripe for each peak and determine the average intensity of the stripe;
+      # Only use the top- and bottom part of the stripe - this will track the edge as it enters or leaves
+      # this stripe (we can't really use changes in brightness since - this provides better contrast)
+      
+      stripe_pic <- function(x) {
+        n=20
+        
+        stripe_coordinates <- c(x-n):c(x+n)
+        extent <- floor(dim(image_)[2] * 0.35)
+        apply(image_[stripe_coordinates, c(1:extent, c(dim(image_)[2] - extent) : dim(image_)[2]) ,], c(3), mean)
+      }
+      
+      # Determine average intensities at each k
+      Xpos_averages <- llply(k, stripe_pic)
+      
+      # Low-pass filter to intensities of each k (not used at the moment)
+      filter_peaks <- function(x){
+        a <- x
+        b <- 1:length(x)
+        lowpass.spline <- smooth.spline(b,a, spar = 0.6)
+        lowpass.loess <- loess(a ~ b, data = data.frame(x = a, y = b))
+        highpass <- a - predict(lowpass.loess, b)
+      }
+      
+      # Smooth the transients by applying rolling mean
+      x <- llply(Xpos_averages, function (x) rollapply(x, 10, mean))
+      
+      # Normalize the data
+      norma <- function(data){
+        data <- data - min(data)
+        data <- data / max(data)
+      }
+      
+      x <- llply(x, norma)
+      x <- llply(x, function (x) (x - 1) * -1)
+      
+      # Identify peaks function
+      findPeaks <- function (x, m = 10){
+        shape <- diff(sign(diff(x, na.pad = FALSE)))
+        pks <- sapply(which(shape < 0), FUN = function(i){
+          z <- i - m + 1
+          z <- ifelse(z > 0, z, 1)
+          w <- i + m + 1
+          w <- ifelse(w < length(x), w, length(x))
+          if(all(x[c(z : i, (i + 2) : w)] <= x[i + 1])) return(i + 1) else return(numeric(0))
+        })
+        pks <- unlist(pks)
+        pks
+      }
+      
+      
+      peaks_position_list <- llply(x, findPeaks)
+      names(peaks_position_list) <- k
+      
+      # Aggregate all peaks into a single vector
+      peaks_positions <- sort(unlist(peaks_position_list))
+      
+      # Define a time window that collects all detected peaks within. Identify all windows
+      # that show missing peaks.
+      
+      window_size <- floor(sd(diff(peaks_positions)))
+      
+      peak_groups <- list()
+      for (i in 1:c(length(x[[1]]) - window_size))
+      {
+        peak_groups[[i]] <- sum(peaks_positions %in% i:c(i + window_size), na.rm = TRUE)
+      }
+      
+      # Number of peaks captured in each frame
+      counted <- unlist(peak_groups)
+      binned_by_zero <- function(x){
+        bins_zero <- NULL
+        null_starts <- NULL
+        null_ends <- NULL
+        
+        for (i in 2:c(length(x) - 1))
+        {
+          if(x[i-1] != 0 & x[i] == 0)
+          {
+            null_starts <- c(null_starts, i)
+          }
+          if(x[i] == 0 & x[i+1] != 0)
+          {
+            null_ends <- c(null_ends, i)
+          }
+        }
+        
+        if(length(null_ends) == 0 | length(null_starts) == 0){
+          position_null <- NULL
+          return(position_null)
+        }
+        
+        length(null_ends) = length(null_starts)
+        
+        position_null <- tibble(null_starts, null_ends)
+        if(is.na(position_null[lengths(position_null[,2]),2]))
+        {
+          position_null[lengths(position_null[,2]),2] <- position_null[lengths(position_null[,2]),1]
+        }
+        
+        position_null <- position_null %>% 
+          rowwise() %>% 
+          mutate(c=mean(c(null_starts,null_ends)))
+        
+        position_null[,3] <- round(position_null[,3])
+        return(position_null)
+        
+      }
+      # Define bins by mid-positions of zero-peak tracks
+      bins_zero <- binned_by_zero(counted)
+      bins_final <- c(0, bins_zero$c, dim(image_)[3])
+      
+      # Make sure that we have not (many) more bins than peaks (could happen if window size was too small)
+      target_bin <- floor(mean(lengths(peaks_position_list)))
+      
+      if(length(bins_final) > 1.2 * target_bin)
+      {
+        n = 1
+        while(length(bins_final) > 1.2 * target_bin | n < 11)
+        {
+          # Let's optimize for a maximum of 10 rounds
+          n <- n + 1
+          
+          window_size <- window_size + 1
+          
+          peak_groups <- list()
+          for (i in 1:c(length(x[[1]]) - window_size))
+          {
+            peak_groups[[i]] <- sum(peaks_positions %in% i:c(i + window_size), na.rm = TRUE)
+          }
+          
+          # Number of peaks captured in each frame
+          counted <- unlist(peak_groups)
+          
+          # Define bins by mid-positions of zero-peak tracks
+          bins_zero <- binned_by_zero(counted)
+          bins_final <- c(0, bins_zero$c, dim(image_)[3])
+          
+        }
+      }
+      
+      # Make sure that we have not fewer bins than peaks (could happen if window size was too large)
+      
+      if(length(bins_final) < 0.8 * target_bin)
+      {
+        n = 1
+        while(length(bins_final) < 1.2 * target_bin | n < 11 | window_size <= 0)
+        {
+          # Let's optimize for a maximum of 10 rounds
+          n <- n + 1
+          
+          window_size <- window_size - 1
+          
+          peak_groups <- list()
+          for (i in 1:c(length(x[[1]]) - window_size))
+          {
+            peak_groups[[i]] <- sum(peaks_positions %in% i:c(i + window_size), na.rm = TRUE)
+          }
+          
+          # Number of peaks captured in each frame
+          counted <- unlist(peak_groups)
+          
+          # Define bins by mid-positions of zero-peak tracks
+          bins_zero <- binned_by_zero(counted)
+          bins_final <- c(0, bins_zero$c, dim(image_)[3])
+          
+        }
+      }
+      
+      
+      registered_peak_positions <- matrix(nrow = length(peaks_position_list), ncol = length(bins_final))
+      
+      # Sort each peak into its corresponding bin
+      for (i in seq_along(lengths(peaks_position_list))) {
+        x <- peaks_position_list[i]
+        registered_peak_positions[i,.bincode(unlist(x), bins_final, FALSE)] <- unlist(x)
+      }
+      
+      # Which peaks have no kymograph (all NA)? Remove from matrix
+      NA_bins <- apply(registered_peak_positions, 2, function(x) any(!is.na(x)))
+      registered_peak_positions <- registered_peak_positions[,NA_bins]
+      
+      # Determine the directionality using regression/slope (negative slopes = anterograde)
+      slopes_in_bin <- NULL
+      slopes_in_bin_Rsquared <- NULL
+      for (i in seq_along(registered_peak_positions[1,])){
+        z <- suppressWarnings(summary(lm(seq_along(registered_peak_positions[,i]) ~ registered_peak_positions[,i]), na.action=na.exclude))
+        slopes_in_bin <- c(slopes_in_bin, z$coefficients[2])
+        slopes_in_bin_Rsquared <- c(slopes_in_bin_Rsquared, z$adj.r.squared)
+        
+        
+      }
+      
+      # Identify all peaks that have only a few evidence peaks (probably false-positives)
+      good_bins <- apply(registered_peak_positions, 2, function(x) sum(!is.na(x)))
+      threshold_bin <- mean(unique(good_bins))
+      
+      final_bin_peaks <- as.data.frame(t(registered_peak_positions[,which(good_bins >= threshold_bin)]))
+      
+      names(final_bin_peaks) <- names(peaks_position_list)
+      
+      slopes_in_bin_final <- slopes_in_bin[which(good_bins >= threshold_bin)]
+      slopes_in_bin_Rsquared <- slopes_in_bin_Rsquared[which(good_bins >= threshold_bin)]
+      # Find first Xpos with no NAs, and pair with last Xpos without NAs:
+      Xpos_NAs <- apply(final_bin_peaks, 2, function(x) sum(is.na(x)))
+      
+      # Catch situations where all Xpos have an NA peak:
+      if(length(which(Xpos_NAs == 0)) == 0)
+      {
+        # Find rows with NAs
+        NA_rows <- apply(final_bin_peaks, 1, function(x) any(is.na(x)))
+        number_of_NAs <- apply(final_bin_peaks, 1, function(x) sum(is.na(x)))
+        # Remove all rows with many consecutive NAs - 
+        final_bin_peaks <- final_bin_peaks[-which(number_of_NAs > 4),]
+        slopes_in_bin_final <- slopes_in_bin_final[-which(number_of_NAs > 4)]
+        slopes_in_bin_Rsquared <- slopes_in_bin_Rsquared[-which(number_of_NAs > 4)]
+        # Re-calculate Xpos
+        Xpos_NAs <- apply(final_bin_peaks, 2, function(x) sum(is.na(x)))
+      }
+      
+      if(length(which(Xpos_NAs == 0)) != 0){
+        
+        
+        first_Xpos <- min(which(Xpos_NAs  < 2))
+        last_Xpos <- max(which(Xpos_NAs < 2))
+        
+        all_coordinates <- final_bin_peaks[,c(first_Xpos, last_Xpos)]
+        all_coordinates$delta <- all_coordinates[,1] - all_coordinates[,2]
+        
+        # Add direction and speed
+        
+        all_coordinates$direction <- as.numeric(all_coordinates[,1] > all_coordinates[,2])
+        all_coordinates$slope <- slopes_in_bin_final
+        
+        exposure_time <- as.numeric(meta.data[which(meta.data$L1 == "time_interval"), 1])
+        resolution_x <- as.numeric(meta.data[which(meta.data$L1 == "resolution"), 1])
+        distance_travelled <- as.numeric(names(all_coordinates)[2]) - as.numeric(names(all_coordinates[1])) * resolution_x
+        
+        all_coordinates$speed <- distance_travelled / c(all_coordinates$delta * exposure_time)
+        all_coordinates$rsq <- slopes_in_bin_Rsquared
+        
+        # In case delta Xpos is zero (two kymographs very close) use the slope and rsquared to determine the directions.
+        # This might override an already determined value, but will adjust a direction that does not match the slope value.
+        # Negative slopes should be anterograde, i.e. the direction should be 1 and not 0.
+        
+        check_these_negative_slopes <- which(all_coordinates$delta == 0 & all_coordinates$slope < 0 & all_coordinates$rsq > 0.15)
+        check_these_positive_slopes <- which(all_coordinates$delta == 0 & all_coordinates$slope > 0 & all_coordinates$rsq > 0.15)
+        
+        all_coordinates$direction[check_these_positive_slopes] <- 1
+        all_coordinates$direction[check_these_negative_slopes] <- 0
+        
+        
+      } else {
+        
+        all_coordinates <- final_bin_peaks[,c(1, length(final_bin_peaks))]
+        all_coordinates$delta <- all_coordinates[,1] - all_coordinates[,2]
+        
+        # Add direction and speed
+        
+        all_coordinates$direction <- as.numeric(all_coordinates[,1] > all_coordinates[,2])
+        all_coordinates$slope <- slopes_in_bin_final
+        
+        # Use slope to alter NA directions
+        NA_directions <- which(is.na(all_coordinates$direction))
+        all_coordinates$direction[NA_directions[all_coordinates[NA_directions,5] < 0]] <- 1
+        all_coordinates$direction[NA_directions[all_coordinates[NA_directions,5] > 0]] <- 0
+        
+        exposure_time <- meta.data[which(meta.data$L1 == "time_interval"), 1]
+        resolution_x <- meta.data[which(meta.data$L1 == "resolution"), 1]
+        distance_travelled <- as.numeric(names(all_coordinates)[2]) - as.numeric(names(all_coordinates[1])) * resolution_x
+        
+        all_coordinates$speed <- distance_travelled / c(all_coordinates$delta * exposure_time)
+        all_coordinates$rsq <- slopes_in_bin_Rsquared
+        
+        # In case delta Xpos is zero (two kymographs very close) use the slope and rsquared to determine the directions.
+        # This might override an already determined value, but will adjust a direction that does not match the slope value.
+        # Negative slopes should be anterograde, i.e. the direction should be 1 and not 0.
+        
+        check_these_negative_slopes <- which(all_coordinates$delta == 0 & all_coordinates$slope < 0 & all_coordinates$rsq > 0.15)
+        check_these_positive_slopes <- which(all_coordinates$delta == 0 & all_coordinates$slope > 0 & all_coordinates$rsq > 0.15)
+        
+        all_coordinates$direction[check_these_positive_slopes] <- 1
+        all_coordinates$direction[check_these_negative_slopes] <- 0
+        
+      }
+      
+      
+      
+      
+      
+      write.csv(all_coordinates, file=paste0(filelist$cxd[f],'_directionmarks','.csv'), row.names = FALSE)  
+    }
+  }  
+  ### Libraries necessary for the next batch of scripts
+  
+  
+  library(broom)
+  library(doParallel)
+  library(plyr)
+  library(dplyr)
+  library(e1071)
+  library(fifer)
+  library(foreach)
+  library(ggplot2)
+  library(parallel)
+  library(purrr)
+  library(quantmod)
+  library(signal)
+  library(tidyr)
+  library(tidyverse)
+  library(tools)
+  library(TTR)
+  library(xlsx)
+  library(baseline)
+  
+  TIFFs_process_question <- rstudioapi::showQuestion("Do you need to reprocess TIFF files?", "This will reprocess all TIFFs with Fiji/Image", ok = "Yes", cancel = "No")
+  ## TIFFs_process_question <- TRUE
+  
+  if(TIFFs_process_question == TRUE){
+    # Move to target directory and create key folders
+    setwd(normalizePath(target_dir))
+    
+    dir.create('TIFFs', showWarnings = FALSE, recursive = FALSE, mode = "0777")
+    dir.create('balled', showWarnings = FALSE, recursive = FALSE, mode = "0777")
+    
+    # Copy all kymograph and metadata files from the movie directory to the target directories
+    flist <- list.files(movie_dir, "csv$", full.names = TRUE, recursive = TRUE)
+    if (length(flist) == 0)
+    {
+      stop("No CSV files found. Please select a different folder")
+    }
+    file.copy(flist, "balled")
+    
+    flist <- list.files(movie_dir, "*Xpos*", full.names = TRUE, recursive = TRUE)
+    
+    if (length(flist) == 0)
+    {
+      stop("No TIFF files found. Please select a different folder")
+    }
+    file.copy(flist, "TIFFs")
+    
+    # Apply rolling ball algorithm via Fiji/ImageJ macro (OS-dependent) on kymographs
+    if(OS == "Darwin")
+    {
+      system(paste0("ImageJ-macosx --ij2 --headless --console --run \"", IJscript, "\" \'input=\"", normalizePath(target_dir), "/TIFFs/\",output=\"", normalizePath(target_dir), "/balled/\"\' "))  
+    } else if(OS == "windows")
+    {
+      system(paste0("ImageJ-win64.exe --ij2 --headless --run \"", IJscript, "\"  \"input='", normalizePath(target_dir),"/TIFFs/', output='", normalizePath(target_dir),"/balled/'\""))
+    } else if(OS == "unix")
+    {
+      system(paste0("ImageJ-linux64 --ij2 --headless --console --run \"", IJscript, "\" \'input=\"", normalizePath(target_dir), "/TIFFs/\",output=\"", normalizePath(target_dir), "/balled/\"\' "))  
+    }
+    
+    # Tracing script - all kymographs become traced    
+    setwd(normalizePath(file.path(target_dir, "balled")))
+    # MAYO Screen Script No.2
+    # Imports TIFF Kymographs (post ImageJ/FIJI batch 'Background substraction')
+    # find . -iname *.tiff -exec /bin/cp "{}" /Volumes/HEART_DATA/ \; (for MacOSX)
+    # find . -iname '*.tiff' -exec cp -n {} /mnt/MAYO\ data/TIFFs/ \; (for Linux)
+    # find . -iname '*peaklines*' -exec mv {} peakline_tiffs/ \; 
+    # and traces these. Exports JPG files with tracing overlay, and CSV table of
+    # transients
+    
+    
+    filelist <- NULL
+    filelist$file <- list.files(".", pattern = "\\.tiff$")
+    
+    # Populate the list with Genotypes, derived from the filename
+    filelist$Genotype <- factor(substr(filelist$file,1,regexpr("_",filelist$file[])-1))
+    
+    # Set averaging value for smoothing step
+    avgx <- 4
+    
+    total <- length(filelist$file)
+    pb <- txtProgressBar(max = total, style = 3)
+    
+    for (f in 1:length(filelist$file))
+      
+    {
+      setTxtProgressBar(pb, f)
+      
+      if(file.exists(paste0(filelist$file[f],"_traced.jpg")))
+      {
+        next
+      } 
+      
+      img2 <- read.image(filelist$file[f])
+      
+      if (length(dim(img2)) > 2) # Make sure we only use images that are x,y (not RGB)
+        next(f)
+      
+      image_ <- imageData(img2)
+      image_ <- t(image_)
+      
+      image_ <- image_ / max(image_) 
+      image_ <- gblur(image_, sigma = 1.5) # improves edge tracing
+      
+      
+      # remove bright edges with sine half-wave
+      p <- seq(0,pi,pi/c(length(image_[,1])-1))
+      sin_p <- sin(p)
+      
+      modifier_2 <- replicate(length(image_[1,]), sin_p)
+      
+      # Adjust image with this modifier
+      image_ <- image_ * modifier_2
+      
+      # Rolling mean for each time slice
+      image_mean <- NULL
+      dd <- function(x) {unlist(rollmean(x, avgx))}
+      image_mean <- apply(image_,1,dd)
+      
+      # Normalize everything from to 0..1
+      image_mean <- sweep(image_mean,1,apply(image_mean , 1, max),`/`)  
+      
+      # Calculate the value range - substract the bottom quantile
+      background_ <- quantile(image_mean)[1]
+      image_mean_bg <- image_mean - background_
+      
+      
+      # Transpose image
+      image_mean_bg <- t(image_mean_bg)
+      
+      
+      # Edge tracing
+      
+      up <- list()
+      down <- list()
+      
+      for (i in 1:length(image_mean_bg[1,])) # first run to get the outlines
+      {
+        x <- as.vector(mean(image_mean_bg[,i]))
+        test1 <- which(image_mean_bg[,i] > x)
+        up_ <- test1[1] # upper (left) boundary
+        
+        test2 <- which(rev(image_mean_bg[,i]) > x)
+        down_left <- test2[1] # lower (right) boundary
+        down_ <- c(length(image_mean_bg[,1])) - down_left
+        up[[i]] <- unlist(up_)	
+        down[[i]] <- unlist(down_)
+        
+      }
+      
+      up <- unlist(up)
+      down <- unlist(down)
+      positions <- 1:length(image_mean_bg[1,])
+      
+      
+      ## Sanity check - make sure that the values are not too different between sequential x-positions
+      limits <- mean(up) - sd(up)
+      
+      to_be_corrected <- which(up <= limits)
+      g = 1
+      
+      while(length(to_be_corrected) > 1)
+      {
+        to_be_corrected <- which(up <= limits)
+        g = g + 1
+        for (i in to_be_corrected)
+        {
+          
+          x <- as.vector(mean(image_mean_bg[,i], na.rm=TRUE))
+          
+          test1 <- which(image_mean_bg[,i] > x)
+          
+          up_ <- test1[g] # second boundary
+          
+          up[i] <- unlist(up_)
+          
+        }
+      }
+      
+      limits_2 <- mean(down) + sd(down)
+      
+      to_be_corrected <- which(down >= limits_2)
+      g = 1
+      
+      while(length(to_be_corrected) > 1)
+      {
+        to_be_corrected <- which(down >= limits_2)
+        g = g + 1
+        for (i in to_be_corrected)
+        {
+          
+          x <- as.vector(mean(image_mean_bg[,i], na.rm=TRUE))
+          test2 <- which(rev(image_mean_bg[,i]) > x)
+          down_left <- test2[g] # second boundary
+          down_ <- c(length(image_mean_bg[,1])) - down_left
+          down[i] <- unlist(down_)
+          
+        }
+      }
+      
+      
+      diameters <- data.frame(cbind(up, down))
+      diameters$distance <- diameters[,2] - diameters[,1]
+      
+      # New data table that uses temporal and spatial resolution data to calculate real distances
+      meta.data <- read.csv(paste0(substr(filelist$file[f],1,gregexpr('.cxd',filelist$file[f])[[1]][1]),"_new_meta_data.csv"))
+      
+      time_interval <- meta.data[which(meta.data$L1 == "time_interval"),1]
+      resolution_meta <- meta.data[which(meta.data$L1 == "resolution"),1]
+      
+      diameters$time <- c(0, time_interval * 1:c(length(diameters$distance)-1))
+      
+      for_export <- diameters[,c(4,3,1,2)]
+      for_export[,2] <- for_export[,2] * resolution_meta 
+      
+      
+      # Define upper half of the movie
+      up_midline <- as.integer(max(up) - sd(up))
+      Half_up_image <- image_mean_bg[1:up_midline,,drop = F]
+      
+      down_midline <- as.integer(max(down) - sd(down))
+      Half_down_image <- image_mean_bg[length(image_[,1]):down_midline,,drop = F]
+      
+      
+      # Brightness changes as edge moves into the frame of the halfmovie
+      auc_function <- function(x) {unlist(trapz(x))}
+      up_brightness <- apply(Half_up_image,2,auc_function)
+      down_brightness <- apply(Half_down_image,2,auc_function)
+      
+      for_export$up_bright <- up_brightness
+      for_export$down_bright <- down_brightness
+      
+      for_export$down_norm <- c(for_export$down - min(for_export$down)) / c(max(for_export$down) - min(for_export$down))[-length(for_export$down)]
+      for_export$up_norm <- c(for_export$up - min(for_export$up)) / c(max(for_export$up) - min(for_export$up))[-length(for_export$up)]
+      for_export$distance_norm <- for_export$down_norm - for_export$up_norm
+      
+      write.table(for_export, file = paste0(filelist$file[f],".csv"), row.names = FALSE, col.names = TRUE, sep = ",")
+      
+      
+      image_diameters <- matrix(nrow=length(image_mean_bg[,1]) , ncol = length(image_mean_bg[1,]))
+      image_diameters[] <- 0 
+      image_diameters[cbind(up, positions)] <- 255
+      image_diameters[cbind(down, positions)] <- 255
+      
+      # x = image_diameters
+      # y = image_mean
+      # Since the image would turn upside-down we need to re-arrange the rows to be correct
+      x = t(image_diameters) #[nrow(image_diameters):1,, drop = FALSE]
+      y = image_mean
+      
+      x <- x[,ncol(x):1, drop = FALSE]
+      y <- y[,ncol(y):1, drop = FALSE]
+      
+      
+      red_green = rgbImage(green=x, red=y, blue = y)
+      #red_green <- transpose(red_green)
+      writeImage(red_green, file=paste0(filelist$file[f],"_traced.jpg"), type = "JPEG")
+    }
+    close(pb)
+    
+    
+    # Tracing quality control step - copies files into a 'good' and 'bad traces' folder
+    # MAYO Screen Script No.3
+    # Imports transients stored in CSV files of traced Kymographs 
+    # and performs quality control. Data will be split into '
+    # Good traces' and 'Bad traces'. Exports summary statistics as well.
+    
+    
+    
+    # Get file list, for all necessary file types
+    
+    filelist <- NULL
+    filelist$csv <- list.files(".", pattern = "\\.tiff.csv$")
+    filelist$smoothie_up <- 1:length(filelist$csv)
+    filelist$smoothie_up_norm <- filelist$smoothie_up
+    filelist$smoothie_down <- filelist$smoothie_up
+    filelist$smoothie_down_norm <- filelist$smoothie_up
+    filelist$sd <- filelist$smoothie_up
+    filelist$dist_cor <- filelist$smoothie_up
+    filelist$norm_dist_cor <- filelist$smoothie_up
+    
+    total <- length(filelist$csv)
+    pb <- txtProgressBar(max = total, style = 3)
+    
+    errorlist <- NULL
+    # https://stats.stackexchange.com/questions/24607/how-to-measure-smoothness-of-a-time-series-in-r
+    # Standard deviation of the time-to-time differences, should be minimal for continuous data
+    
+    for (f in 1:length(filelist$csv))
+    {
+      setTxtProgressBar(pb, f)
+      x <- read.csv(filelist$csv[f])
+      
+      if (length(colnames(x)) < 4)
+      {
+        next
+      }
+      
+      
+      #filelist$smoothie_up_norm[f] <-  sd(diff(x$up))/abs(mean(diff(x$up), na.rm=TRUE))
+      
+      if(sd(diff(x$down)) != 0 & sd(diff(x$up)) != 0)
+      {
+        filelist$smoothie_up[f] <-  sd(diff(x$up))
+        filelist$smoothie_down[f] <-  sd(diff(x$down))
+        filelist$autocorr_up[f] <- cor(diff(x$up)[-length(diff(x$up))],diff(x$up)[-1])
+        filelist$autocorr_down[f] <- cor(diff(x$down)[-length(diff(x$down))],diff(x$down)[-1])
+        filelist$autocorr_sd[f] <- sd(c(filelist$autocorr_up[f], filelist$autocorr_down[f]))
+        filelist$sd[f] <- sd(c(filelist$smoothie_down[f], filelist$smoothie_up[f]))
+        filelist$dist_cor[f] <- cor(diff(x$distance)[-length(diff(x$distance))],diff(x$distance)[-1])
+        filelist$norm_dist_cor[f] <- cor(diff(x$distance_norm)[-length(diff(x$distance_norm))],diff(x$distance_norm)[-1])
+        
+        
+      } else {
+        filelist$smoothie_up[f] <-  10
+        filelist$smoothie_down[f] <- 10
+        #filelist$smoothie_down_norm[f] <-  sd(diff(x$down))/abs(mean(diff(x$down), na.rm=TRUE))
+        filelist$autocorr_up[f] <- 0
+        filelist$autocorr_down[f] <- 0
+        filelist$autocorr_sd[f] <- 10
+        filelist$sd[f] <- 100
+        filelist$dist_cor[f] <- 0
+        filelist$norm_dist_cor[f] <- 0
+        
+      }
+    }
+    
+    close(pb)
+    
+    
+    # Add genotype info to allow for sorting by genotype
+    x <- data.frame(filelist)
+    x$CODE <- as.factor(unlist(lapply(filelist$csv, function(x) substring(x,1,gregexpr('_',x)[[1]]-1)[1])))
+    x$ID <- unlist(lapply(filelist$csv, function(x) substring(x,gregexpr('_',x)[[1]][1]+1,gregexpr('_',x)[[1]][2]-1)))
+    x$Xpos <- as.numeric(unlist(lapply(filelist$csv, function(x) substring(x,gregexpr('Xpos_',x)[[1]]+5,gregexpr('.tiff',x)[[1]]-1)[1])))
+    x$quality <- 'n.t.'
+    x$quality[which(x$autocorr_up > median(x$autocorr_up, na.rm = TRUE) * 0.9 & x$autocorr_down > median(x$autocorr_down, na.rm = TRUE) * 0.9 & x$sd < median(x$sd, na.rm = TRUE) * 1.5)] <- 'excellent'
+    x$quality[which(x$quality == 'n.t.' & x$dist_cor > 0.6 & x$sd < 4 & x$norm_dist_cor > 0.6)] <- 'good'
+    x$tiff <- unlist(lapply(filelist$csv, function(x) substring(x,1,gregexpr('.tiff',x)[[1]]+4)[1]))
+    x$jpeg <- unlist(lapply(filelist$csv, function(x) paste0(substring(x,1,gregexpr('.tiff',x)[[1]]+4)[1], "_traced.jpg")))
+    
+    # Separate data into 3 folders - excellent, 'good and 'bad' traces. Copy csv files of excellent traces as well.
+    
+    mainDir <- getwd()
+    subDir <- paste0("bad traces", collapse =' ')
+    subDirPath <- file.path(mainDir, subDir)
+    dir.create(subDirPath, showWarnings = FALSE)
+    file.copy(as.character(x$csv[which(x$quality == "n.t.")]),subDirPath, overwrite = FALSE, recursive = FALSE, copy.mode = TRUE, copy.date=FALSE)
+    file.copy(x$jpeg[which(x$quality == "n.t.")],subDirPath, overwrite = FALSE, recursive = FALSE, copy.mode = TRUE, copy.date=FALSE)
+    
+    subDir2 <- paste0("good traces", collapse =' ')
+    subDirPath2 <- file.path(mainDir, subDir2)
+    dir.create(subDirPath2, showWarnings = FALSE)
+    file.copy(as.character(x$csv[which(x$quality == "good")]),subDirPath2,overwrite = FALSE, recursive = FALSE, copy.mode = TRUE, copy.date=FALSE)
+    file.copy(x$jpeg[which(x$quality == "good")],subDirPath2, overwrite = FALSE, recursive = FALSE, copy.mode = TRUE, copy.date=FALSE)
+    
+    subDir3 <- paste0("excellent traces", collapse =' ')
+    subDirPath3 <- file.path(mainDir, subDir3)
+    dir.create(subDirPath3, showWarnings = FALSE)
+    file.copy(as.character(x$csv[which(x$quality == "excellent")]),subDirPath3, overwrite = FALSE, recursive = FALSE, copy.mode = TRUE, copy.date=FALSE)
+    file.copy(x$jpeg[which(x$quality == "excellent")],subDirPath3, overwrite = FALSE, recursive = FALSE, copy.mode = TRUE, copy.date=FALSE)
+    
+    write.table(x, file = "Quality_control.csv", row.names = FALSE, col.names = TRUE, sep = ",")
+    
+    
+    # Load QC data
+    # QC <- read.csv(file = "Quality_control.csv")
+    QC <- x
+    # Identify for each file if at least 1 file is of excellent quality
+    QC$cxd <- unlist(lapply(QC$jpeg, function(x) substring(x, 1, regexpr("cxd", x) + 2)))
+    
+    QC_files <- QC[,c(15,18)]
+    
+    QC_files <- melt(table(QC_files))
+    
+    QC_files <- split(QC_files, QC_files$quality)
+    
+    # No excellent traces:
+    need_more_traces <- unique(QC_files$excellent$cxd[which(QC_files$excellent$value == 0)])
+    
+    # Select these movies from good traces:
+    rescue_traces <- QC_files$good[QC_files$good$cxd[need_more_traces],]
+    rescue_traces <- rescue_traces[which(rescue_traces$value > 0), ]
+    
+    QC_good <- QC[which(QC$quality == "good"),]
+    rescue_traces_QC <- merge(rescue_traces, QC_good, by.x = "cxd", by.y = "cxd", all.x = TRUE)
+    
+    file.copy(as.character(rescue_traces_QC$csv),subDirPath3, overwrite = FALSE, recursive = FALSE, copy.mode = TRUE, copy.date=FALSE)
+    file.copy(rescue_traces_QC$jpeg,subDirPath3, overwrite = FALSE, recursive = FALSE, copy.mode = TRUE, copy.date=FALSE)
+    
+  }
+  
+  # Final run - several scripts that count beats and aggregate the data  
+  setwd(normalizePath(file.path(target_dir, "balled", "excellent traces")))
+  # setwd("/home/geo/GIANT/balled_MAYO_only/excellent traces")
+  file.copy(normalizePath(mappings_file), ".", overwrite = TRUE)
+  
+  
+  # Transient analysis
+  # MAYO Screen Script No.4
+  # Imports transients stored in CSV files of good Kymographs 
+  # and identifies intervals.
+  # A file that maps the Codes with genotypes should be provided
+  # names 'mappings.csv' with the following structure:
+  #
+  # CODE	    cross	                                type	      fly	    human
+  # MAYO0001	Hand4.2 tdtK x BL-41698 - CG4747 RNAi	experiment	CG4747	GLYR1
+  # ...
+  
+  # This file will export .Rdata files that contain all raw data and will
+  # be used subsequently for downstream analysis, i.e. graphs, statistics...
+  
+  # Adjust findpeaks parameters for Peaks and Valleys to better match acutal peaks before rollmean
+  
+  findPeaksP <- function (x, thresh = 0) 
+  {
+    pks <- which(diff(sign(diff(x, na.pad = FALSE)), na.pad = FALSE) < 
+                   0) + 1
+    if (!missing(thresh)) {
+      if (sign(thresh) < 0) 
+        thresh <- -thresh
+      pks[x[pks - 1] - coredata(x[pks]) > thresh]
+    }
+    else pks
+  }
+  
+  findPeaksV <- function (x, thresh = 0) 
+  {
+    pks <- which(diff(sign(diff(x, na.pad = FALSE)), na.pad = FALSE) < 
+                   0) + 1
+    if (!missing(thresh)) {
+      if (sign(thresh) < 0) 
+        thresh <- -thresh
+      pks[x[pks - 1] - coredata(x[pks]) > thresh]
+    }
+    else pks
+  }
+  options(warn=2)
+  
+  {  
+    filelist <- NULL
+    filelist$jpg <- list.files(".", pattern = "\\.jpg$")
+    filelist$file <- gsub("_traced.jpg", ".csv", filelist$jpg)
+    filelist$Genotype <- as.factor(unlist(lapply(filelist$jpg, function(x) substring(x,1,gregexpr('_',x)[[1]]-1)[1])))
+    
+    # Extract more information from the filenames
+    # Genotype is:
+    filelist$Genotype_filename <- as.factor(unlist(lapply(filelist$file, function(x) substring(x,1,gregexpr('_',x)[[1]]-1)[1])))
+    
+    # Age is:
+    filelist$Age_filename <- as.factor(unlist(lapply(filelist$file, function(x) substring(x,gregexpr('_',x)[[1]][2]+1,gregexpr('_',x)[[1]]+1)[2])))
+    
+    # Sex is:
+    filelist$Sex_filename <- as.factor(unlist(lapply(filelist$file, function(x) substring(x,gregexpr('_',x)[[1]][3]-1,gregexpr('_',x)[[1]]-1)[3])))
+    
+    # Comment is:
+    filelist$comment <- unlist(lapply(filelist$file, function(x) substring(x,gregexpr('_',x)[[1]][5]+1, gregexpr('\\.',x)[[1]]-1)[1]), use.names = TRUE)
+    filelist$comment[which(is.na(filelist$comment))] <- "none"
+    
+    # Populate the list with Genotypes, derived from the filename
+    filelist$Genotype <- factor(substr(filelist$file,1,regexpr("_",filelist$file[])-1))
+    
+    # Export the Genotype list (can be used to create Instructions.csv file)
+    if(mappings_file_type == "CSV")
+    {
+      library('openxlsx')
+      write.xlsx(read.csv("mappings.csv"), "mappings.xlsx")
+    }
+    
+    crosses <- read.xlsx2(file="mappings.xlsx", sheetIndex = 1)
+    crosses$CODE <- gsub("_", "", crosses$CODE)
+    
+    if(length(names(crosses)) < 8){
+      stop('The mappings.csv/xlsx file has missing columns')
+    }
+    
+    
+    
+    
+    z <- crosses[,c(1,2,3,6,7)]
+    names(z) <- c("CODE",  "cross", "type",  "fly",   "human")
+    
+    # Is sorting and/or grouping present? If not, group by symbols and type
+    if (length(names(z)) == 5)
+    {
+      symbols_ <- list(as.factor(levels(z$human)))
+      symbols_[[2]] <- 1:length(levels(z$human))
+      z$sorting <- symbols_[[2]][match(z$human, symbols_[[1]])]
+      
+      groups_ <- list(levels(z$type))
+      groups_[[2]] <- 1:length(levels(z$type))
+      z$group <- groups_[[2]][match(z$type, groups_[[1]])]
+    }
+    z$sorting <- as.factor(z$sorting)
+    z$group	<- as.factor(z$group)
+    z$CODE <- as.factor(z$CODE)
+    z_original <- z
+    
+    
+    # filelist$Genotype <- factor(substr(filelist$file,1,regexpr("_",filelist$file[])-1))
+    filelist$index <- c(1:length(filelist$file))
+    
+    
+    # Reconstruct CODE/Genotype to include age and sex (adjust z)
+    filelist$CODED <- factor(paste0(filelist$Genotype,filelist$Age_filename, filelist$Sex_filename))
+    filelist$CODE <- factor(filelist$Genotype)
+    filelist$cross <- z$cross[match(filelist$CODE,z$CODE)]
+    filelist$type <- z$type[match(filelist$CODE,z$CODE)]
+    filelist$symbol_fly <- z$fly[match(filelist$CODE,z$CODE)]
+    filelist$symbol_human <- z$human[match(filelist$CODE,z$CODE)]
+    filelist$sorting <- z$sorting[match(filelist$CODE,z$CODE)]
+    filelist$group <- z$group[match(filelist$CODE,z$CODE)]
+    filelist$Genotype <- factor(filelist$CODED) # New Genotype value
+    
+    z_new <- NULL
+    z_new$CODE <- filelist$Genotype
+    z_new$cross <- filelist$cross[match(z_new$CODE, filelist$Genotype)]
+    z_new$type <- filelist$type[match(z_new$CODE, filelist$Genotype)]
+    z_new$fly <- filelist$symbol_fly[match(z_new$CODE, filelist$Genotype)]
+    z_new$human <- filelist$symbol_human[match(z_new$CODE, filelist$Genotype)]
+    z_new$origCode <- filelist$CODE[match(z_new$CODE, filelist$Genotype)]
+    z_new$sorting <- filelist$sorting[match(z_new$CODE, filelist$Genotype)]
+    z_new$group <- filelist$group[match(z_new$CODE, filelist$Genotype)]
+    z_new <- data.frame(z_new)
+    z <- z_new
+    
+    save(z, file = "z.Rdata")
+    save(filelist, file="filelist.Rdata")
+  }  
+  print("Codes assigned")
+  
+  {  
+    # Create master table with overview of all files and indices
+    genotype_indices <- data.frame(aggregate(filelist$index, by= list(filelist$Genotype), function (x) x, simplify = FALSE))
+    
+    names(genotype_indices) <- c("Code", "Index")
+    genotype_indices$n <- sapply(genotype_indices$Index, function (x) length(x))
+    save(genotype_indices, file="genotype_indices.Rdata")
+    
+    filelist_with_index <- data.frame(filelist)
+    filelist_with_index$IDnumber <- as.numeric(unlist(lapply(filelist_with_index$file, function(x) as.numeric(substring(x,gregexpr('_',x)[[1]][1]+1,gregexpr('_',x)[[1]]-1)[2]))))
+    filelist_with_index$flyID <- as.factor(sapply(1:length(filelist_with_index$file), function(i) substring(filelist_with_index$file[i], gregexpr('_',filelist_with_index$file[i])[[1]] +1 , gregexpr('w[fm]_',filelist_with_index$file[i])[[1]]+1)[[1]]))
+    filelist_with_index$flyID <- as.factor(sapply(1:length(filelist_with_index$file), function(i) paste0(filelist_with_index$Genotype_filename[i], "_", filelist_with_index$flyID[i])))
+    save(filelist_with_index, file="filelist_with_index.Rdata")
+    
+    
+    direction_files <- data.frame("direction" = list.files(path = "../", pattern = "_directionmarks.csv"))
+    direction_files$CXD <- unlist(llply(direction_files[,1], function(x) substr(x, 1, gregexpr('.cxd', x)[[1]][1]+3)))
+    direction_jpg_matched_files <- data.frame("traced_csv" = filelist_with_index$file)
+    direction_jpg_matched_files$CXD <- unlist(llply(direction_jpg_matched_files[,1], function(x) substr(x, 1, gregexpr('.cxd', x)[[1]][1]+3)))
+    directions <- merge(direction_files, direction_jpg_matched_files, by.x = "CXD", by.y = "CXD", all.y = TRUE )
+  }  
+  print("Filelist done")
+  
+  # Work through the transients now
+  # By genotype (Code, i), then by fly (j)
+  
+  avgx = 5 	# Steps to average in rolling mean for transient
+  avgt = 400   # Steps for rollmean in threshold transient
+  library('reshape2')
+  
+  # Process each file and gather spatiotemporal data
+  
+  number_of_codes <- length(genotype_indices$Code)
+  
+  # To save memory we will split the codes into chunks
+  # and store these in a temporary sub-folder
+  split_codes <- split(1:number_of_codes, ceiling(seq_along(1:number_of_codes)/20))
+  mainDir <- getwd()
+  subDir <- paste0("temp", collapse =' ')
+  subDirPath <- file.path(mainDir, subDir)
+  dir.create(subDirPath, showWarnings = FALSE)
+  
+  total_time <- vector("list", length(filelist_with_index$index))
+  anterior_beat_percent <- vector("list", length(filelist_with_index$index))
+  
+  # Direction-switches per fly and speeds
+  reversals <- vector("list", length(filelist_with_index$index))
+  speeds_anterograde <- vector("list", length(filelist_with_index$index))
+  speeds_retrograde <- vector("list", length(filelist_with_index$index))
+  
+  
+  
+  for(p in 1:length(split_codes))
+  {
+    message("Calculating Genotypes in chunks of 20, part ", p, " of ", length(split_codes))
+    intervals_final <- NULL
+    
+    # Internal chunk counter:
+    cc <- 0
+    
+    # Get current sets
+    current_code_range <- split_codes[[p]]
+    
+    # Initialize List to store data
+    transients_experiment <- vector("list", length(current_code_range))
+    timescale <- vector("list", length(current_code_range))
+    
+    intervals_final <- vector("list", length(current_code_range))
+    transients_final <- vector("list", length(current_code_range))
+    identicals_final <- vector("list", length(current_code_range))
+    time_used_final <- vector("list", length(current_code_range))
+    transient_metrics_final <- vector("list", length(current_code_range))
+    spline_all <- vector("list", length(current_code_range))
+    EDD_this_genotype <- vector("list", length(current_code_range))
+    ESD_this_genotype <- vector("list", length(current_code_range))
+    FS_this_genotype <- vector("list", length(current_code_range))
+    meanEDD <- vector("list", length(current_code_range))
+    meanESD <- vector("list", length(current_code_range))
+    directionality_this_genotype <- vector("list", length(current_code_range))
+    
+    
+    
+    for (i in current_code_range[1]:current_code_range[length(current_code_range)])
+    {
+      
+      cc <- cc + 1
+      
+      # Set and reset variables to store loop data mylist <- vector("list", length(genotype_indices$Index[[i]]))
+      interval_this_genotype <- vector("list", length(genotype_indices$Index[[i]]))
+      transients_this_genotype <- vector("list", length(genotype_indices$Index[[i]]))
+      identicals_this_genotype <- vector("list", length(genotype_indices$Index[[i]]))
+      time_used <- vector("list", length(genotype_indices$Index[[i]]))
+      transient_metrics_this_genotype <- vector("list", length(genotype_indices$Index[[i]]))
+      EDD_this_fly <- vector("list", length(genotype_indices$Index[[i]]))
+      ESD_this_fly <- vector("list", length(genotype_indices$Index[[i]]))
+      FS_this_fly <- vector("list", length(genotype_indices$Index[[i]]))
+      mean_EDD_this_fly <- vector("list", length(genotype_indices$Index[[i]]))
+      mean_ESD_this_fly <- vector("list", length(genotype_indices$Index[[i]]))
+      directionality_this_fly <- vector("list", length(genotype_indices$Index[[i]]))
+      spline_this_genotype <- vector("list", length(genotype_indices$Index[[i]]))
+      
+      print(paste0(i, " of ",number_of_codes, " done (", round(c(i/number_of_codes * 100), digits = 2), "%)"))
+      for (j in 1:(genotype_indices$n[i]))
+        
+      {
+        v <- genotype_indices$Index[[i]][j]
+        transients_experiment <- read.csv(file = as.character(filelist_with_index$file[match(v, filelist_with_index$index)]), header=TRUE, sep = ",")[,1:2]
+        names(transients_experiment) <- c("time", "distance")
+        timescale[[v]] <- median(diff(transients_experiment[,1]))
+        total_time[[v]] <- length(transients_experiment[,1]) * timescale[[v]]
+        
+        # Extract data regarding beat direction and contraction speed
+        directions_this_file <- read.csv(file = paste0("../" , as.character(directions$direction[which(directions$traced_csv %in% as.character(filelist_with_index$file[match(v, filelist_with_index$index)]))])), header=TRUE, sep = ",", stringsAsFactors = FALSE)
+        
+        directions_check <- as.numeric(as.factor(directions_this_file$direction))
+        
+        if(length(table(abs(diff(directions_check)))) == 0)
+        {
+          reversals[[v]] <- NA
+        } else if(length(table(abs(diff(directions_check)))) == 1)  {
+          reversals[[v]] <- 0
+        } else {
+          reversals[[v]] <- table(abs(diff(directions_check)))[[2]]
+        }
+        beat_directions <- reshape2::melt(table(directions_this_file$direction))
+        
+        total_directions <- sum(beat_directions$value)
+        
+        anterograde_beats <- beat_directions$value[which(beat_directions$Var1 == 1)]
+        if(length(anterograde_beats) == 0){
+          anterograde_beats <- 0
+        }
+        anterior_beat_percent[[v]] <- anterograde_beats / total_directions
+        directionality_this_fly[[j]] <- anterograde_beats / total_directions
+        
+        directions_this_file[is.infinite(directions_this_file[,6]), 6] <- NA
+        
+        if(tally(directions_this_file %>% group_by(direction) %>% dplyr::summarize(mean(speed, na.rm = TRUE)) %>% dplyr::filter(direction == 1)) == 0){
+          speeds_anterograde[[v]] <- NA
+        } else {
+          speeds_anterograde[[v]] <- unlist(directions_this_file %>% group_by(direction) %>%  dplyr::summarize(mean(speed, na.rm = TRUE)) %>% dplyr::filter(direction == 1))[[2]]
+        }
+        if(tally(directions_this_file %>% group_by(direction) %>%  dplyr::summarize(mean(speed, na.rm = TRUE)) %>% dplyr::filter(direction == 0)) == 0){
+          speeds_retrograde[[v]] <- NA
+        } else {
+          speeds_retrograde[[v]] <- unlist(directions_this_file %>% group_by(direction) %>%  dplyr::summarize(mean(speed, na.rm = TRUE)) %>% dplyr::filter(direction == 0))[[2]]
+        }
+        
+        x2 <- transients_experiment
+        x2[2] <- x2[2] * -1 # Current transient, inverted
+        x2[2] <- round(x2[2], digits = 2)
+        
+        
+        # Baseline correction
+        corr_x2 <- data.frame(distance = c(baseline(rbind(x2[,2]), wm=20, ws=20, method = 'rollingBall')@corrected))
+        corr_x2$time <- x2[,1]
+        corr_x2 <- corr_x2[,2:1]
+        
+        # Generate smooth spline for original data (for best peaks)    
+        
+        dd <- data.frame(corr_x2)														
+        
+        k <- merge(x2, dd[,1:2], by.x='time', by.y = 'time', all.y = TRUE)
+        
+        # Identify peaks and valleys
+        
+        valleys <- findPeaksV(-dd$distance)
+        hills <- findPeaksP(dd$distance)
+        
+        # Check if Valley/Peak indices are assigned beyond length of transient
+        if(last(hills) > length(dd$time))
+        {
+          hills[length(hills)] <-  length(dd$time)
+        }
+        
+        if(last(valleys) > length(dd$time))
+        {
+          valleys[length(valleys)] <-  length(dd$time)
+        }
+        
+        dd$peak <- NA
+        dd$peak[hills] <- "Peak"
+        dd$peak[valleys] <- "Valley"
+        
+        k$peak <- NA
+        
+        # Correct for supershort 
+        if(length(dd$time) < avgt)
+        {
+          avgt <- as.integer(length(dd$time) / 3)
+        }
+        
+        # define a threshold distance that separates valleys from peaks
+        threshold_dd <- data.frame(unlist(rollmeanr(dd[2], avgt)))
+        
+        # Reset avgt if it was changed for shor movies
+        avgt <- 400
+        
+        # Add median distance to the trailing values of the threshold
+        extension_ <- c(length(dd$time) - length(threshold_dd[,1])) # positions to fill
+        threshold_dd[length(threshold_dd[,1]):c(length(threshold_dd[,1])+extension_),] <- median(tail(threshold_dd[,1])) # fill with median
+        
+        threshold_dd$time <- dd[1:c(length(dd[,1])),1]
+        threshold_dd <- threshold_dd[,c(1,2)]
+        threshold_dd <- threshold_dd[,2:1] # Order columns	
+        
+        # Peak and Valley IDs
+        V_id <- which(dd$peak == "Valley")
+        P_id <- which(dd$peak == "Peak")
+        
+        # Find above-threshold valley and remove it
+        
+        h <- merge(dd[V_id,],  threshold_dd, by.x = "time", by.y="time")
+        t <- h[which(h[,2] >= h[,4]),1]
+        dd$peak[dd$time %in% t] <- NA
+        
+        # Find below-threshold hill and remove it
+        
+        h <- merge(dd[P_id,],  threshold_dd, by.x = "time", by.y="time")
+        t <- h[which(h[,2] <= h[,4]),1]
+        dd$peak[dd$time %in% t] <- NA
+        
+        # Determine peak/valley pairs (find a valley, followed by a peak, followed by a valley)
+        # Keep only those as real valley/peaks
+        
+        all_pos <- which(!is.na(dd$peak))
+        all_pos_kinds <- dd$peak[all_pos]
+        
+        peaky <- function(x)
+        {
+          return(all(x == c("Peak",  "Peak")))
+        }
+        
+        
+        if(length(which(rollapply(all_pos_kinds, width = 2, by = 1, FUN = peaky, align = "left"))) == 0)
+        {
+          all_pos_cleaned <- all_pos
+        } else
+        {
+          all_pos_cleaned <- all_pos[-c(which(rollapply(all_pos_kinds, width = 2, by = 1, FUN = peaky, align = "left")) + 1)]
+        }
+        
+        
+        new_peaklist <- dd$peak[all_pos_cleaned]
+        
+        
+        correct_peak_pattern <- function(x)
+        {
+          return(all(x == c("Valley",  "Peak",  "Valley")))
+        }
+        
+        start_positions <- all_pos_cleaned[c(which(rollapply(new_peaklist, width = 3, by = 1, FUN = correct_peak_pattern, align = "left")))]
+        peak_postions <-  all_pos_cleaned[c(which(rollapply(new_peaklist, width = 3, by = 1, FUN = correct_peak_pattern, align = "left")) + 1)]
+        tail_positions <-  all_pos_cleaned[c(which(rollapply(new_peaklist, width = 3, by = 1, FUN = correct_peak_pattern, align = "left")) + 2)]
+        
+        position_index <- data.frame(starts = start_positions)
+        position_index$peaks <- peak_postions
+        position_index$ends <- tail_positions
+        
+        time_index <- data.frame(matrix(x2$time[as.matrix(position_index)],nrow = length(position_index$ends),ncol = 3))
+        names(time_index) <- names(position_index)
+        
+        # quartz()
+        # plot(x2, cex = 0.5, type = "l")
+        # abline(v=x2$time[position_index$starts], col = 'red')
+        # abline(v=x2$time[position_index$peaks], col = 'blue')
+        # abline(v=x2$time[position_index$ends], col = 'green')
+        
+        # Create working copy
+        transients_all <- k[,c(1,2,4)]
+        names(transients_all) <- names(dd)
+        
+        
+        # Diameter-pairs
+        EndDiameters <- data.frame(DD = transients_all[start_positions,2])
+        EndDiameters$SD <- transients_all[peak_postions,2]
+        EndDiameters$FS <- c(EndDiameters$DD - EndDiameters$SD) / EndDiameters$DD
+        
+        EDD_this_fly[[j]] <- median(EndDiameters$DD)
+        ESD_this_fly[[j]] <- median(EndDiameters$SD)
+        FS_this_fly[[j]] <-  median(EndDiameters$FS)
+        
+        mean_EDD_this_fly <- mean(EDD_this_fly[[j]] * -1)
+        mean_ESD_this_fly <- mean(ESD_this_fly[[j]] * -1)
+        
+        
+        # Set beginning with first valley and end with valley
+        transients_all <- transients_all[start_positions[1]:length(transients_all[,1]),]
+        transients_all <- transients_all[1:last(tail_positions),]
+        row.names(transients_all) <- NULL 
+        
+        
+        # Get intervals lengths for rhythmicity:
+        intervals <- time_index
+        
+        intervals$SI <- intervals$ends - intervals$starts
+        intervals$deltaSI <- c(diff(intervals$SI), NA)
+        intervals$deltaSIplus <- NA
+        intervals$deltaSIplus[1:c(length(intervals$SI)-1)] <- intervals$deltaSI[2:length(intervals$SI)] 
+        intervals$HP <- c(diff(intervals$starts), NA)
+        intervals$DI <- intervals$HP - intervals$SI
+        intervals$i <- i
+        intervals$j <- j
+        intervals$Index <- v
+        interval_this_genotype[[j]] <- intervals
+        
+        intervals_final[[cc]] <- dplyr::bind_rows(interval_this_genotype)
+        
+        # Extract all transients
+        transients <- list()
+        interval_mark <- list()
+        
+        for (n in 1:length(position_index$starts))
+        {
+          
+          transients[[n]] <- x2[position_index$starts[n]:position_index$ends[n],]
+          transients[[n]]$diameter <- transients[[n]]$distance * -1
+          transients[[n]]$timestamp <- transients[[n]]$time
+          transients[[n]]$time <- transients[[n]]$time - transients[[n]]$time[1] # set t to t0
+          transients[[n]]$distance <- transients[[n]]$distance - transients[[n]]$distance[1] # set distance start to 0
+          transients[[n]]$distance <- transients[[n]]$distance / max(transients[[n]]$distance) # normalize distance
+          transients[[n]]$delta <- c(transients[[n]]$diameter - transients[[n]]$diameter[1]) * -1
+          transients[[n]]$from_EDD <- mean_EDD_this_fly - transients[[n]]$delta
+          transients[[n]]$velocity <- c(0,diff(transients[[n]]$delta) / diff(transients[[n]]$time))
+          
+          # Modify start index to skip leading periods of non-contraction
+          transients[[n]] <- transients[[n]][c(which(transients[[n]]$velocity != 0)[1] - 1):length(transients[[n]]$velocity),]
+          transients[[n]]$time <- transients[[n]]$time - transients[[n]]$time[1]
+          transients[[n]]$i <- i
+          transients[[n]]$j <- j
+          transients[[n]]$index <- v
+          transients[[n]]$beat <- n
+        }
+        
+        transients_this_genotype[[j]] <- bind_rows(transients)
+        transients_final[[cc]] <- bind_rows(transients_this_genotype)
+        
+        
+        AUC <- list() # Area under the curve - to identify super small transients
+        for (m in 1:(length(transients)))
+        {
+          AUC[[m]] <- trapz(transients[[m]][,2])
+        }
+        AUC <- unlist(AUC)
+        
+        # Select all transients (place to select transients of certain size)
+        upper <- quantile(AUC, na.rm = TRUE)[5] 
+        lower <- quantile(AUC, na.rm = TRUE)[1]
+        
+        identicals <- transients[which(AUC >= lower & AUC <= upper)]
+        
+        # Remove very short transients
+        transient_lengths <- sapply(identicals, nrow)
+        
+        identicals <- identicals[which(transient_lengths > 10)]
+        
+        identicals_this_genotype[[j]] <- identicals
+        names(identicals_this_genotype) <- rep(genotype_indices$Code[i], length(identicals_this_genotype))
+        
+        identicals_final[[cc]] <- identicals_this_genotype
+        
+        
+        # Length of each transient
+        used_time <- list()
+        
+        for (n in 1:(length(identicals)))
+        {
+          used_time[n] <- max(identicals[[n]][,1])
+        }
+        
+        time_used[[j]] <- sum(unlist(used_time))
+        time_used_final[[cc]] <- unlist(time_used)
+        
+        
+        
+        # Analysis
+        transient_metrics <- list()
+        velocities <- list()
+        for (q in 1:length(identicals))
+        {
+          # Create smooth spline
+          transient_metrics$spl[[q]] <- smooth.spline(identicals[[q]][,1:2])
+          
+          # Determine the timepoints with max positive and negative slopes
+          velocities$max_velocity[[q]] <- identicals[[q]]$velocity[which.max(identicals[[q]]$velocity)]
+          velocities$max_neg_velocity[[q]]  <- identicals[[q]]$velocity[which.min(identicals[[q]]$velocity)]
+          
+        }
+        
+        transient_metrics_this_genotype[[j]] <- velocities
+        transient_metrics_this_genotype[[j]]$genotype <- genotype_indices$Code[i]
+        transient_metrics_final[[cc]] <- transient_metrics_this_genotype
+        
+        spline_this_genotype[[j]] <- transient_metrics$spl
+        spline_all[[cc]] <- spline_this_genotype
+        
+        
+        EDD_this_genotype[[cc]] <- EDD_this_fly
+        ESD_this_genotype[[cc]] <- ESD_this_fly
+        FS_this_genotype[[cc]] <-  unlist(FS_this_fly)
+        mean_EDD_this_fly <- mean(EDD_this_fly[[j]] * -1)
+        mean_ESD_this_fly <- mean(ESD_this_fly[[j]] * -1)
+        meanEDD[[cc]] <- mean_EDD_this_fly
+        meanESD[[cc]] <- mean_ESD_this_fly
+        directionality_this_genotype[[cc]] <- directionality_this_fly
+        
+        
+      }
+      
+    }
+    
+    intervals_final <- bind_rows(intervals_final)
+    
+    # Store each set individually
+    fileName_intervals_final <- paste0("temp/Genotype_chunks_intervals_final_part_", p,".Rds")
+    saveRDS(intervals_final, file=fileName_intervals_final)
+    
+    fileName_timescale <- paste0("temp/Genotype_chunks_timescale_part_", p,".Rds")
+    saveRDS(timescale, file=fileName_timescale)
+    
+    fileName_transients_final <- paste0("temp/Genotype_chunks_transients_final_part_", p,".Rds")
+    saveRDS(transients_final, file=fileName_transients_final)
+    
+    fileName_identicals_final <- paste0("temp/Genotype_chunks_identicals_final_part_", p,".Rds")
+    saveRDS(identicals_final, file=fileName_identicals_final)
+    
+    fileName_time_used_final <- paste0("temp/Genotype_chunks_time_used_final_part_", p,".Rds")
+    saveRDS(time_used_final, file=fileName_time_used_final)
+    
+    fileName_transient_metrics_final <- paste0("temp/Genotype_chunks_transient_metrics_final_part_", p,".Rds")
+    saveRDS(transient_metrics_final, file=fileName_transient_metrics_final)
+    
+    fileName_spline_all <- paste0("temp/Genotype_chunks_spline_all_part_", p,".Rds")
+    saveRDS(spline_all, file=fileName_spline_all)
+    
+    fileName_EDD_this_genotype <- paste0("temp/Genotype_chunks_EDD_this_genotype_part_", p,".Rds")
+    saveRDS(EDD_this_genotype, file=fileName_EDD_this_genotype)
+    
+    fileName_ESD_this_genotype <- paste0("temp/Genotype_chunks_ESD_this_genotype_part_", p,".Rds")
+    saveRDS(ESD_this_genotype, file=fileName_ESD_this_genotype)
+    
+    fileName_FS_this_genotype <- paste0("temp/Genotype_chunks_FS_this_genotype_part_", p,".Rds")
+    saveRDS(FS_this_genotype, file=fileName_FS_this_genotype)
+    
+    fileName_meanEDD <- paste0("temp/Genotype_chunks_meanEDD_part_", p,".Rds")
+    saveRDS(meanEDD, file=fileName_meanEDD)
+    
+    fileName_meanESD <- paste0("temp/Genotype_chunks_meanESD_part_", p,".Rds")
+    saveRDS(meanESD, file=fileName_meanESD)
+    
+    fileName_directionality_this_genotype <- paste0("temp/Genotype_chunks_directionality_this_genotype_part_", p,".Rds")
+    saveRDS(directionality_this_genotype, file=fileName_directionality_this_genotype)
+    
+  }  
+  
+  save(anterior_beat_percent, file="anterior_beat_percent.Rdata")
+  
+  # Combine Spline_all chunks to compute more temporal data:
+  {
+    
+    spline_all <- list()
+    for(p in 1:length(split_codes))
+    {
+      fileName <-  paste0("temp/Genotype_chunks_spline_all_part_", p,".Rds")
+      data_read <- readRDS(fileName)
+      spline_all <- c(spline_all, data_read)
+    } 
+    
+  }
+  
+  # Store complete data of splines
+  {
+    save(spline_all, file="spline_all.Rdata")
+  }
+  
+  # Transient analysis pt.1
+  metrics <- function(y){
+    
+    max.value <- which.max(y$y)
+    
+    # Contraction range
+    Contraction_range <- y$y[1:max.value]
+    
+    # Relaxation range
+    Relaxation_range <- y$y[c(max.value + 1): length(y$y)]
+    
+    # Time to 10%/25%... peak
+    tt10p <- y$x[which(Contraction_range > 0.1)[1]]
+    tt25p <- y$x[which(Contraction_range > 0.25)[1]]
+    tt50p <- y$x[which(Contraction_range > 0.5)[1]]
+    tt75p <- y$x[which(Contraction_range > 0.75)[1]]
+    tt90p <- y$x[which(Contraction_range > 0.9)[1]]
+    
+    # Time to 15%/25%... return
+    tt90r <- y$x[which(Relaxation_range < 0.9)[1] + max.value]
+    tt75r <- y$x[which(Relaxation_range < 0.75)[1] + max.value]
+    tt50r <- y$x[which(Relaxation_range < 0.5)[1] + max.value]
+    tt25r <- y$x[which(Relaxation_range < 0.25)[1] + max.value]
+    tt10r <- y$x[which(Relaxation_range < 0.10)[1] + max.value]
+    
+    list(y, flatten(list(peaked = y$x[max.value], tt10p = tt10p, tt25p = tt25p, tt50p = tt50p, tt75p = tt75p, 
+                         tt90p = tt90p, tt90r = tt90r, tt75r = tt75r, tt50r = tt50r, tt25r = tt25r, tt10r = tt10r)))
+    
+  }
+  
+  # Split spline numerical data evenly into chunks of 20 and save individually
+  split_spline_indices <- split(1:length(spline_all), ceiling(seq_along(1:length(spline_all))/20))
+  for(i in 1:length(split_spline_indices))
+  {
+    metrics_all <- list()
+    message("Calculating Splines part ", i, " of ", length(split_spline_indices))
+    
+    y <- spline_all[split_spline_indices[[i]]]
+    
+    for (k in 1:length(y))
+    {
+      u <- list()  
+      values <- list()
+      for (j in 1:length(y[[k]]))
+      {
+        u[[j]] <- lapply(y[[k]][[j]], function(x) predict(x, x=seq(0,max(x$x), by=0.0002)))
+        
+        values[[j]] <- lapply(u[[j]], function(x) metrics(x))
+        values[[j]]$name <- genotype_indices$Code[split_spline_indices[[i]][k]]
+        values[[j]]$id <- j
+        
+      }
+      metrics_all[[k]] <- values  
+      
+    }
+    
+    fileName <- paste0("temp/Splines_part_", i,".Rds")
+    saveRDS(metrics_all, file=fileName)  
+    
+  }  
+  
+  rm(spline_all)
+  
+  # Combine chunks back into single object
+  metrics_read <- list()
+  metrics_all <- list()
+  for(i in 1:length(split_spline_indices))
+  {
+    
+    fileName <- paste0("temp/Splines_part_", i,".Rds")
+    metrics_read <- readRDS(fileName)
+    metrics_all <- c(metrics_all, metrics_read)
+  }
+  
+  save(metrics_all, file = "metrics_all_splined.Rdata")
+  rm(metrics_all)
+  
+  
+  # Load remaining data chunks:
+  # Combine chunks back into single object
+  {
+    timescale <- list()
+    intervals_final <- NULL
+    transients_final <- list()
+    identicals_final <- list()
+    time_used_final <- list()
+    transient_metrics_final <- list()
+    EDD_this_genotype <- list()
+    ESD_this_genotype <- list()
+    FS_this_genotype <- list()
+    meanEDD <- list()
+    meanESD <- list()
+    directionality_this_genotype <- list()
+    
+    for(p in 1:length(split_codes))
+    {
+      fileName <-  paste0("temp/Genotype_chunks_intervals_final_part_", p,".Rds")
+      data_read <- readRDS(fileName)
+      
+      if(p==1)
+      {
+        intervals_final <- data_read
+      } else
+      {
+        intervals_final <- rbind(intervals_final, data_read)
+      }
+    } 
+    
+    for(p in 1:length(split_codes))
+    {
+      fileName <-  paste0("temp/Genotype_chunks_FS_this_genotype_part_", p,".Rds")
+      data_read <- readRDS(fileName)
+      FS_this_genotype <- c(FS_this_genotype, data_read)
+    } 
+    
+    for(p in 1:length(split_codes))
+    {
+      fileName <-  paste0("temp/Genotype_chunks_identicals_final_part_", p,".Rds")
+      data_read <- readRDS(fileName)
+      identicals_final <- c(identicals_final, data_read)
+    } 
+    
+    for(p in 1:length(split_codes))
+    {
+      fileName <-  paste0("temp/Genotype_chunks_time_used_final_part_", p,".Rds")
+      data_read <- readRDS(fileName)
+      time_used_final <- c(time_used_final, data_read)
+    } 
+    
+    for(p in 1:length(split_codes))
+    {
+      fileName <-  paste0("temp/Genotype_chunks_EDD_this_genotype_part_", p,".Rds")
+      data_read <- readRDS(fileName)
+      EDD_this_genotype <- c(EDD_this_genotype, data_read)
+    } 
+    
+    for(p in 1:length(split_codes))
+    {
+      fileName <-  paste0("temp/Genotype_chunks_transient_metrics_final_part_", p,".Rds")
+      data_read <- readRDS(fileName)
+      transient_metrics_final <- c(transient_metrics_final, data_read)
+    } 
+    
+    for(p in 1:length(split_codes))
+    {
+      fileName <-  paste0("temp/Genotype_chunks_timescale_part_", p,".Rds")
+      data_read <- readRDS(fileName)
+      timescale <- c(timescale, data_read)
+    } 
+    
+    for(p in 1:length(split_codes))
+    {
+      fileName <-  paste0("temp/Genotype_chunks_meanEDD_part_", p,".Rds")
+      data_read <- readRDS(fileName)
+      meanEDD <- c(meanEDD, data_read)
+    } 
+    
+    for(p in 1:length(split_codes))
+    {
+      fileName <-  paste0("temp/Genotype_chunks_ESD_this_genotype_part_", p,".Rds")
+      data_read <- readRDS(fileName)
+      ESD_this_genotype <- c(ESD_this_genotype, data_read)
+    } 
+    
+    for(p in 1:length(split_codes))
+    {
+      fileName <-  paste0("temp/Genotype_chunks_transients_final_part_", p,".Rds")
+      data_read <- readRDS(fileName)
+      transients_final <- c(transients_final, data_read)
+    } 
+    
+    for(p in 1:length(split_codes))
+    {
+      fileName <-  paste0("temp/Genotype_chunks_directionality_this_genotype_part_", p,".Rds")
+      data_read <- readRDS(fileName)
+      directionality_this_genotype <- c(directionality_this_genotype, data_read)
+    }   
+    
+    
+    
+  }
+  
+  # Store all data
+  {
+    save(timescale, file="timescale.Rdata")
+    save(total_time, file="total_time.Rdata")
+    save(reversals, file="reversals.Rdata")
+    save(speeds_anterograde, file="speeds_anterograde.Rdata")
+    save(speeds_retrograde, file="speeds_retrograde.Rdata")
+    save(intervals_final, file="intervals_final.Rdata")
+    save(transients_final, file="transients_final.Rdata")
+    save(identicals_final, file="identicals_final.Rdata")
+    save(time_used_final, file="time_used_final.Rdata")
+    save(transient_metrics_final, file="transient_metrics_final.Rdata")
+    save(EDD_this_genotype, file="EDD_this_genotype.Rdata")
+    save(ESD_this_genotype, file="ESD_this_genotype.Rdata")
+    save(FS_this_genotype, file="FS_this_genotype.Rdata")
+    save(meanEDD, file="meanEDD.Rdata")
+    save(meanESD, file="meanESD.Rdata")
+    save(directionality_this_genotype, file="directionality_this_genotype.Rdata")
+  }
+  rm(transients_final)
+  rm(intervals_final)
+  rm(identicals_final)
+  
+  # Transient analysis pt.2
+  # load(file="genotype_indices.Rdata")
+  load(file = "metrics_all_splined.Rdata")
+  
+  split_metrics <- split(1:length(metrics_all), ceiling(seq_along(1:length(metrics_all))/50))
+  
+  for(m in 1:length(split_metrics))
+  {
+    message("Generating Transients ", m, " of ", length(split_metrics))
+    
+    # Generate dataframe to contain values 
+    final_transients <- matrix(nrow = 1, ncol = 14) # 11 columns for 11 measurements + 3 indices
+    final_transients <- data.frame(final_transients)
+    final_transients <- final_transients[0,]
+    colnames(final_transients) <- c("i", "j", "index", colnames(data))
+    
+    # Get current metrics
+    current_metrics_range <- split_metrics[[m]]
+    
+    for (i in current_metrics_range[1]:current_metrics_range[length(current_metrics_range)])
+    { 
+      # Select current metrics
+      a <- metrics_all[[i]]  
+      
+      for (j in 1:length(a))
+      {
+        b <- a[[j]]
+        index_ <- genotype_indices$Index[[i]][j]
+        data <- unlist(lapply(lapply(b[1], '[', 2), '[[', 1))
+        
+        for (k in 2:c(length(b)-2))
+        {
+          data <- rbind(data,unlist(lapply(lapply(b[k], '[', 2), '[[', 1)))
+        } 
+        
+        
+        final_transients <- rbind(final_transients, data.frame(cbind(i = i, j = j, index = index_, data)))
+      }
+    }
+    
+    row.names(final_transients) <- NULL
+    fileName_final_transients <- paste0("temp/Final_transients_part_", m,".Rds")
+    saveRDS(final_transients, file=fileName_final_transients)
+    rm(final_transients)
+  }
+  rm(metrics_all)
+  
+  ## Re-assemble final_transients
+  final_transients <- matrix(nrow = 1, ncol = 14) # 11 columns for 11 measurements + 3 indices
+  final_transients <- data.frame(final_transients)
+  final_transients <- final_transients[0,]
+  colnames(final_transients) <- c("i", "j", "index", colnames(data))
+  
+  for(m in 1:length(split_metrics))
+  {
+    fileName <-  paste0("temp/Final_transients_part_", m,".Rds")
+    data_read <- readRDS(fileName)
+    final_transients <- rbind(final_transients, data_read)
+  }
+  
+  message("Saving Final Transients")
+  save(final_transients, file = "final_transients.Rdata")
+  rm(final_transients)
+  
+  # Metadata and final aggregate
+  # Meta data extraction and aggregation
+  
+  filelist <- NULL
+  filelist$csv <- list.files("../", pattern = "\\_new_meta_data.csv$", recursive = TRUE, full.names = TRUE)
+  
+  total <- length(filelist$csv)
+  
+  
+  # Create empty matrix to be filled with metadata
+  baseline <- read.csv(filelist$csv[1], sep = ",")
+  meta_data <- matrix(nrow = length(filelist$csv), ncol = 21) 
+  meta_data <- data.frame(meta_data)
+  names(meta_data) <- baseline[,2]
+  meta_data$filename <- NA
+  
+  message("Adding metadata...")
+  pb <- txtProgressBar(max = total, style = 3)
+  
+  
+  for (f in 1:length(filelist$csv))
+  {
+    setTxtProgressBar(pb, f)
+    
+    baseline <- read.csv(filelist$csv[f], sep = ",", stringsAsFactors = F)
+    
+    
+    meta_data[f,] <- c(t(as.numeric(baseline[1:21,1])) , filelist$csv[f])
+  } 
+  
+  meta_data$filename <- gsub("..//", "", meta_data$filename)
+  meta_data$CODE <- NA
+  meta_data$CODE <- factor(substr(meta_data$filename,1,regexpr("_",meta_data$filename[])-1))
+  
+  meta_data$flyID <- NA
+  meta_data$flyID <- as.factor(unlist(lapply(meta_data$filename, function(x) substring(x,1,gregexpr('_',x)[[1]][3]-1))))
+  
+  meta_data$recordingday <- as.Date(as.POSIXct(as.numeric(meta_data$created_unix_from_file), origin = "1970-01-01"))
+  meta_data$week_year <- format(meta_data$recordingday, format="%Y-%U")
+  
+  # Fix dates that are '1' - use the date from the creation date 
+  # MAYO data/Screen Data$      find . -iname '*.cxd' -exec stat -f "%m%t%Sm %N" "{}" > unix_data.csv \;
+  
+  # In R, load this file into a data frame:
+  # - if present!
+  if(file.exists("test_unix_data.csv")){
+    
+    
+    date_created_raw <- read.table("test_unix_data.csv", fill =  TRUE, as.is=TRUE, sep="\t")
+    # First row is the unix timecode, so the time the file was modified the last time (i.e. created)
+    # Problem is to get the filename from the second column
+    # 
+    # Solution: define a function that splits a string according to a character, then unlist this string
+    # and determine its length (i.e. how many substrings = depth of the path).
+    # Then take the last string from the split string list (which is the cxd.file), and replace the .cxd
+    # ending with .xml to allow for comparison with xml filelist
+    
+    cxdfile <- function(x) {
+      y <- length(unlist(strsplit(x, "/")))
+      x <- unlist(strsplit(x, "/"))[y]
+      #gsub("cxd", "xml", x)
+    } 
+    
+    # Apply this function to all lines of the file list, then add the filename as extra column
+    date_created_raw$file <- lapply(date_created_raw[,1], cxdfile)
+    date_created_raw <- date_created_raw[!duplicated(date_created_raw$V1),]
+    date_created_raw <- date_created_raw[!duplicated(date_created_raw$V1),]
+    date_created_raw$flyID <- as.factor(unlist(lapply(date_created_raw$file, function(x) substring(x,1,gregexpr('_',x)[[1]][3]-1))))
+    date_created_raw$V1 <- unlist(lapply(date_created_raw$V1, function(x) substring(x,1,gregexpr(' ',x)[[1]][1]-1)))
+    date_created_raw$file <- unlist(date_created_raw$file )
+    
+    meta_data$cxd_date <- date_created_raw$V1[match(meta_data$flyID, date_created_raw$flyID)]
+    meta_data$cxd_file <- date_created_raw$file[match(meta_data$flyID, date_created_raw$flyID)]
+    
+  } else
+  {
+    meta_data$cxd_date <- NA
+    meta_data$cxd_file <- meta_data$filename
+  }
+  
+  
+  for (i in 1:length(meta_data$cxd_date))
+  {
+    if(!is.na(meta_data$cxd_date[i]))
+    {
+      next
+    } else
+    {
+      meta_data$cxd_date[i] <- meta_data$created_unix_from_file[i]
+    }
+  }
+  
+  
+  meta_data$cxd_recordingday <- as.Date(as.POSIXct(as.integer(meta_data$cxd_date), origin = "1970-01-01"))
+  meta_data$cxd_week_year <- format(meta_data$cxd_recordingday, format="%U %Y")
+  
+  # Fix filenames
+  meta_data$flyID <- gsub("..//", "", meta_data$flyID)
+  meta_data$filename <- gsub("..//", "", meta_data$filename)
+  meta_data$CODE <- gsub("..//", "", meta_data$CODE)
+  
+  meta_data$cxd_file <- gsub("_new_meta_data.csv", "cxd", meta_data$cxd_file)
+  write.csv(meta_data, file="meta_data_all.csv", row.names = F)
+  
+  
+  # MAYO Screen Script No.5
+  # Imports .Rdata files and performs downstream analysis 
+  # to determine phenotypes and statistical differences.
+  
+  
+  # Define functions
+  row.compare <- function (x)	{
+    x[order(x$ID, -x$Length),]
+  }
+  
+  substrRight <- function(x, n){
+    substr(x, nchar(x)-n+1, nchar(x))
+  }
+  
+  # Import Master cross list
+  load(file = "z.Rdata")
+  
+  crosses <- z
+  
+  crosses <- crosses[,c(6,2,3,4,5)]
+  colnames(crosses) <- c("CODE", "cross", "type", "gene", "human.ortholog")
+  
+  load(file="genotype_indices.Rdata")
+  genotype_indices$n <- sapply(genotype_indices$Index, function (x) length(x))
+  genotype_indices$cross <- z$cross[match(genotype_indices$Code, z$CODE)]
+  genotype_indices$human <- z$human[match(genotype_indices$Code, z$CODE)]
+  
+  
+  genotypes <- z$CODE
+  
+  number_of_codes <- length(genotype_indices$Code)
+  
+  # Data quality
+  coverage <- list()
+  
+  load("time_used_final.Rdata")
+  load("total_time.Rdata")
+  for (i in 1:number_of_codes)
+  {
+    for (j in 1:(genotype_indices$n[i]))
+    {
+      # Determine how many seconds of the whole transient have been used
+      coverage[unlist(genotype_indices$Index[[i]][j])] <- unlist(time_used_final[[i]][[j]]) / unlist(total_time[unlist(genotype_indices$Index[[i]][j])])
+    }}
+  coverage <- unlist(coverage)
+  
+  # Compute coverage and beat statistics per genotype
+  master_table <- genotype_indices[,c(1,3)]
+  flies <- function (x) {genotype_indices$Index[[x]]	}
+  
+  
+  master_table$Cross <- z$cross[match(master_table$Code, z$CODE)]
+  master_table$type <- z$type[match(master_table$Code, z$CODE)]
+  master_table$human <- z$human[match(master_table$Code, z$CODE)]
+  master_table$fly <- z$fly[match(master_table$Code, z$CODE)]
+  
+  master_table$AgeSex <- as.factor(unlist(lapply(as.character(master_table$Code), function (x) substrRight(x,2))))
+  master_table$Sex <- as.factor(unlist(lapply(as.character(master_table$Code), function (x) substrRight(x,1))))
+  master_table$Age <- as.factor(unlist(lapply(as.character(master_table$AgeSex), function (x) substr(x,1,1))))
+  master_table$coverage <- sapply(1:length(master_table$Code), function (x) mean(coverage[flies(x)]))
+  master_table$SDcoverage <- sapply(1:length(master_table$Code), function (x) sd(coverage[flies(x)]))
+  
+  
+  
+  # Data to extract
+  # Heart Period = average transient length
+  # Heart Rate = beats per minute
+  # Analysis
+  
+  load(file="identicals_final.Rdata")
+  load(file="filelist_with_index.Rdata")
+  load(file="transient_metrics_final.Rdata")
+  load(file="EDD_this_genotype.Rdata")
+  load(file="ESD_this_genotype.Rdata")
+  load(file="FS_this_genotype.Rdata")
+  
+  HR_all <- list()
+  EDD <- list()
+  ESD <- list()
+  FS <- list()
+  HR_this_fly <- list()
+  SIs <- list()
+  DIs <- list()
+  FS_SD <- list()
+  files_ <- list()
+  min_velocity <- list()
+  max_velocity <- list()
+  
+  for (i in 1:number_of_codes) # loop through genotypes
+  {
+    
+    HR_this_fly <- vector("list", genotype_indices$n[i])
+    EDD_this_fly <- list()
+    ESD_this_fly <- list()
+    min_velocity_ <- list()
+    max_velocity_ <- list()
+    files_this_genotype <- list()
+    
+    for (j in 1:(genotype_indices$n[i])) # loop through flies
+    {
+      
+      HR_this_fly[[j]] <- length(identicals_final[[i]][[j]]) / round(unlist(total_time[unlist(genotype_indices$Index[[i]][j])]), digits = 2)
+      
+      EDD_this_fly[[j]] <- mean(EDD_this_genotype[[i]][[j]], na.rm = TRUE) * -1
+      ESD_this_fly[[j]] <- mean(ESD_this_genotype[[i]][[j]], na.rm = TRUE) * -1
+      
+      files_this_genotype[[j]] <- filelist_with_index[genotype_indices$Index[[i]][j],1]
+      min_velocity_[[j]] <- mean(unlist(transient_metrics_final[[i]][[j]]$max_neg_velocity), na.rm = TRUE)
+      max_velocity_[[j]] <- mean(unlist(transient_metrics_final[[i]][[j]]$max_velocity), na.rm = TRUE)
+      
+      
+    }
+    
+    files_[[i]] <- unlist(files_this_genotype)
+    
+    EDD[[i]]	<- unlist(EDD_this_fly)
+    names(EDD)[[i]] <- levels(genotype_indices$Code)[i]
+    
+    ESD[[i]]	<- unlist(ESD_this_fly)
+    names(ESD)[[i]] <- levels(genotype_indices$Code)[i]
+    
+    FS[[i]] <- FS_this_genotype[[i]]
+    names(FS)[[i]] <- levels(genotype_indices$Code)[i]
+    
+    FS_SD[[i]] <- sd(FS_this_genotype[[i]], na.rm = TRUE)
+    names(FS_SD)[[i]] <- levels(genotype_indices$Code)[i]
+    
+    names(FS_this_genotype)[[i]] <- levels(genotype_indices$Code)[i]
+    
+    min_velocity[[i]] <- unlist(min_velocity_)
+    names(min_velocity)[[i]] <- levels(genotype_indices$Code)[i]
+    
+    max_velocity[[i]] <- unlist(max_velocity_)
+    names(max_velocity)[[i]] <- levels(genotype_indices$Code)[i]
+    
+    
+    HR_all[[i]] <- unlist(HR_this_fly)
+    names(HR_all)[[i]] <- levels(genotype_indices$Code)[i]
+    
+  }
+  
+  master_table$HR <- sapply(1:length(master_table$Code), function (x) mean(HR_all[[x]]))
+  master_table$SDHR <- sapply(1:length(master_table$Code), function (x) sd(HR_all[[x]]))
+  
+  master_table$EDD <- sapply(1:length(master_table$Code), function (x) mean(EDD[[x]]))
+  master_table$SDEDD <- sapply(1:length(master_table$Code), function (x) sd(EDD[[x]]))
+  
+  master_table$ESD <- sapply(1:length(master_table$Code), function (x) mean(ESD[[x]]))
+  master_table$SDESD <- sapply(1:length(master_table$Code), function (x) sd(ESD[[x]]))
+  
+  master_table$FS <- sapply(1:length(master_table$Code), function (x) mean(FS[[x]]))
+  master_table$SDFS <- sapply(1:length(master_table$Code), function (x) sd(FS[[x]]))
+  
+  master_table$max_velocity <- sapply(1:length(master_table$Code), function (x) mean(max_velocity[[x]]))
+  master_table$SDMV <- sapply(1:length(master_table$Code), function (x) sd(max_velocity[[x]]))
+  
+  master_table$min_velocity <- sapply(1:length(master_table$Code), function (x) mean(min_velocity[[x]]))
+  master_table$SDMinV <- sapply(1:length(master_table$Code), function (x) sd(min_velocity[[x]]))
+  
+  write.table(master_table, file = "master_table.csv", sep = ",", row.names = FALSE)
+  
+  
+  
+  # Files
+  files_used <- reshape2::melt(files_)
+  rm(files_)
+  
+  # EDD
+  EDD_data <- reshape2::melt(EDD)
+  EDD_data$CODE <- as.factor(unlist(lapply(EDD_data$L1, function(x) substring(x,1,gregexpr('[mf]$',x)[[1]]-2)[1])))
+  EDD_data$human <- crosses$human.ortholog[match(EDD_data$CODE, crosses$CODE)]
+  EDD_data$fly <- crosses$gene[match(EDD_data$CODE, crosses$CODE)]
+  EDD_data$type <- crosses$type[match(EDD_data$CODE, crosses$CODE)]
+  names(EDD_data)[1] <- "EDD"
+  
+  
+  # ESD
+  ESD_data <- reshape2::melt(ESD)
+  ESD_data$CODE <- as.factor(unlist(lapply(ESD_data$L1, function(x) substring(x,1,gregexpr('[mf]$',x)[[1]]-2)[1])))
+  ESD_data$human <- crosses$human.ortholog[match(ESD_data$CODE, crosses$CODE)]
+  ESD_data$fly <- crosses$gene[match(ESD_data$CODE, crosses$CODE)]
+  ESD_data$type <- crosses$type[match(ESD_data$CODE, crosses$CODE)]
+  names(ESD_data)[1] <- "ESD"
+  
+  
+  # FS
+  FS_data <- reshape2::melt(FS)
+  FS_data$CODE <- as.factor(unlist(lapply(FS_data$L1, function(x) substring(x,1,gregexpr('[mf]$',x)[[1]]-2)[1])))
+  FS_data$human <- crosses$human.ortholog[match(FS_data$CODE, crosses$CODE)]
+  FS_data$fly <- crosses$gene[match(FS_data$CODE, crosses$CODE)]
+  FS_data$type <- crosses$type[match(FS_data$CODE, crosses$CODE)]
+  names(FS_data)[1] <- "FS"
+  
+  
+  # HR
+  HR_data <- reshape2::melt(HR_all)
+  HR_data$CODE <- as.factor(unlist(lapply(HR_data$L1, function(x) substring(x,1,gregexpr('[fm]$',x)[[1]]-2)[1])))
+  HR_data$human <- crosses$human.ortholog[match(HR_data$CODE, crosses$CODE)]
+  HR_data$fly <- crosses$gene[match(HR_data$CODE, crosses$CODE)]
+  HR_data$type <- crosses$type[match(HR_data$CODE, crosses$CODE)]
+  names(HR_data)[1] <- "HR"
+  
+  
+  
+  # neg velocity
+  min_velocity_data <- reshape2::melt(min_velocity)
+  min_velocity_data$CODE <- as.factor(unlist(lapply(min_velocity_data$L1, function(x) substring(x,1,gregexpr('[mf]$',x)[[1]]-2)[1])))
+  min_velocity_data$human <- crosses$human.ortholog[match(min_velocity_data$CODE, crosses$CODE)]
+  min_velocity_data$fly <- crosses$gene[match(min_velocity_data$CODE, crosses$CODE)]
+  min_velocity_data$type <- crosses$type[match(min_velocity_data$CODE, crosses$CODE)]
+  names(min_velocity_data)[1] <- "min_velocity"
+  
+  # max_velocity
+  max_velocity_data <- reshape2::melt(max_velocity)
+  max_velocity_data$CODE <- as.factor(unlist(lapply(max_velocity_data$L1, function(x) substring(x,1,gregexpr('[fm]$',x)[[1]]-2)[1])))
+  max_velocity_data$human <- crosses$human.ortholog[match(max_velocity_data$CODE, crosses$CODE)]
+  max_velocity_data$fly <- crosses$gene[match(max_velocity_data$CODE, crosses$CODE)]
+  max_velocity_data$type <- crosses$type[match(max_velocity_data$CODE, crosses$CODE)]
+  names(max_velocity_data)[1] <- "max.velocity"
+  
+  
+  write.table(EDD_data, file = "EDD_table.csv", sep = ",", row.names = FALSE)
+  write.table(ESD_data, file = "ESD_table.csv", sep = ",", row.names = FALSE)
+  write.table(FS_data, file = "FS_table.csv", sep = ",", row.names = FALSE)
+  write.table(HR_data, file = "HR_table.csv", sep = ",", row.names = FALSE)
+  write.table(min_velocity_data, file = "min_velocity_data.csv", sep = ",", row.names = FALSE)
+  write.table(max_velocity_data, file = "max_velocity_table.csv", sep = ",", row.names = FALSE)
+  
+  
+  all_data <- cbind(files_used[,1], EDD_data[,1], ESD_data[,1], FS_data[,1], HR_data[,1], min_velocity_data[,1], max_velocity_data)
+  
+  names(all_data) <- c("file", "EDD", "ESD", "FS", "HR","min.velocity", "max.velocity", "CODE_long", "CODE", "human_gene", "fly_gene", "type")
+  all_data$cross <- crosses$cross[match(all_data$CODE, crosses$CODE)]
+  
+  # Create flyID - first, take number, age and sex and combine, THEN add CODE as prefix
+  all_data$flyID <- as.factor(sapply(1:length(all_data$file), function(i) substring(all_data$file[i], gregexpr('_',all_data$file[i])[[1]] +1 , gregexpr('w[fm]_',all_data$file[i])[[1]]+1)[[1]]))
+  all_data$flyID <- as.factor(sapply(1:length(all_data$file), function(i) paste0(all_data$CODE[i], "_", all_data$flyID[i])))
+  
+  all_data$ID_number <- as.factor(unlist(lapply(all_data$file, function(x) as.numeric(substring(x,gregexpr('_',x)[[1]][1]+1,gregexpr('_',x)[[1]]-1)[2]))))
+  all_data$Xpos <-  as.numeric(sapply(1:length(all_data$file), function(i) substring(all_data$file[i], gregexpr('Xpos_',all_data$file[i])[[1]] + 5 , gregexpr('.tiff',all_data$file[i])[[1]]-1)[[1]]))
+  
+  
+  
+  meta_data <- read.csv(file="meta_data_all.csv", sep = ",")
+  meta_data <- meta_data[,-c(23:24)]
+  # Add CXD filename and match with metadata
+  all_data$cxd_file <- as.factor(unlist(lapply(all_data$file, function(x) substring(x, 1, gregexpr('.cxd',x)[[1]][1]+3))))
+  all_data <- merge(all_data, meta_data, by.x = "cxd_file", by.y = "cxd_file", all.x = TRUE)
+  
+  all_data <- all_data %>% dplyr::select("flyID", "file", everything(), "cxd_file") # Re-order columns
+  all_data$CODE.y <- all_data$CODE
+  all_data$CODE.x <- all_data$CODE
+  all_data <- all_data[,c(1:2, 4:10, 46, 12:39, 45, 40:42, 3, 43:44)]
+  
+  
+  # MAD - based on final_transients calculate MAD_tt10r
+  load(file = "final_transients.Rdata")
+  
+  final_transients_grouped <- group_by(final_transients, i, j, index)
+  MAD_final_transients <- dplyr::select(final_transients_grouped, 1:3, 14)
+  MAD_tt10r <- MAD_final_transients %>% summarise_all(c("mad"), na.rm = TRUE)
+  
+  # Arrhythmia Index (AI)
+  load(file="intervals_final.Rdata")
+  
+  intervals_grouped_AI <- group_by(intervals_final, i, j, Index)
+  intervals_grouped_AI <- dplyr::select(intervals_grouped_AI, 7, 9:11)
+  AI_index <- intervals_grouped_AI %>% summarise_all(c("sd", "median"), na.rm = TRUE)
+  AI_index$AI <- AI_index$sd / AI_index$median
+  
+  # Combine MAD_tt10r and AI
+  merged_AI_MADtt10r <- left_join(AI_index, MAD_tt10r, by = c("i" = "i", "j" = "j", "Index" = "index"))
+  merged_AI_MADtt10r_files <- left_join(filelist_with_index, merged_AI_MADtt10r, by = c("index" = "Index"))
+  names(merged_AI_MADtt10r_files)[24] <- "MAD_tt10r"
+  
+  # Stroke volume - based on all_data table result calculate SV
+  Volume_SV <- function(x,y){  
+    pi / 4 * (x^2 - y^2)
+  }
+  
+  all_data_grouped_SV <- group_by(all_data, flyID, file)
+  all_data_grouped_SV <- dplyr::select(all_data_grouped_SV, 1:4, 6, 16)
+  all_data_grouped_SV$SV <- Volume_SV(all_data_grouped_SV$EDD, all_data_grouped_SV$ESD)
+  all_data_grouped_SV$CO <- all_data_grouped_SV$SV * all_data_grouped_SV$HR
+  
+  # Identify segment based on project
+  
+  # Typical value is about 330px for a 1000pxl-wide movie.
+  threshold_Xpos <- floor(mean(unique(all_data_grouped_SV$Xpos)))
+  
+  abdominal_segment <- function(x) {
+    if(x > threshold_Xpos) {
+      return("posterior")
+    } else {
+      return("anterior")
+    }}
+  
+  load(file="anterior_beat_percent.Rdata")
+  load(file="speeds_anterograde.Rdata")
+  load(file="speeds_retrograde.Rdata")
+  
+  all_data_grouped_SV$segment <- sapply(all_data_grouped_SV$Xpos, abdominal_segment)
+  all_data_grouped_SV$direction_anterior <- unlist(anterior_beat_percent)
+  new_parameters <- left_join(merged_AI_MADtt10r_files, all_data_grouped_SV, by = c("jpg" = "file"))
+  new_parameters <- new_parameters[,c(1:4,19:20,23:24,26:32)]
+  new_parameters$jpg_noX <- new_parameters$jpg
+  new_parameters$anterograde_percent <- unlist(anterior_beat_percent)
+  new_parameters <- new_parameters[,-c(2,5,6,9:12)]
+  new_parameters$anterograde_speed <- unlist(speeds_anterograde)
+  new_parameters$retrograde_speed <- unlist(speeds_retrograde)
+  
+  # Remove Peak-Number from filename
+  new_parameters$jpg_noX <- as.factor(unlist(lapply(new_parameters$jpg_noX, function(x) paste0(substring(x, 1, gregexpr('peak_',x)[[1]][1]+4), "X",substring(x,gregexpr('_at',x)[[1]][1])))))
+  
+  # Merge all data with final tables
+  
+  all_data_added <- left_join(all_data, new_parameters, by = c("file" = "jpg"))
+  write.table(all_data_added, file = "all_data_table.csv", sep = ",", row.names = FALSE)
+  
+  
+  
+  
+  
+  # Transient analysis
+  meta_data <- as_tibble(read.csv(file="meta_data_all.csv", sep = ","))
+  crosses <- read.xlsx2(file="mappings.xlsx", sheetIndex = 1)
+  crosses$CODE <- gsub("_", "", crosses$CODE)
+  crosses <- crosses[,c(1,4,5,9)]
+  
+  
+  # Organize by genotype and flyID and index
+  grouped <- group_by(final_transients, i, j, index)
+  grouped_per_fly <- grouped %>% summarise_all(c("median","sd"), na.rm = TRUE)
+  
+  # Add the actual genotype name via index
+  grouped_per_fly$CODE <- filelist_with_index$Genotype_filename[match(grouped_per_fly$index, filelist_with_index$index)]
+  grouped_per_fly$jpg <- filelist_with_index$jpg[match(grouped_per_fly$index, filelist_with_index$index)]
+  grouped_per_fly <- grouped_per_fly %>% dplyr::select("CODE", "jpg", everything()) # Re-order columns
+  
+  intervals_grouped <- group_by(intervals_final, i, j, Index)
+  #intervals_grouped_per_fly <- summarise_all(intervals_grouped, funs(median,sd), na.rm = TRUE)
+  intervals_grouped_per_fly <- intervals_grouped %>% summarise_all(c("median","sd"), na.rm = TRUE)
+  
+  
+  
+  
+  MAD_intervals <- dplyr::select(intervals_grouped, 4,7:11)
+  # MADs <- summarise_all(MAD_intervals, funs(mad), na.rm = TRUE)
+  MADs <- MAD_intervals %>% summarise_all(c("mad"), na.rm = TRUE)
+  
+  
+  names(MADs) <- c("i", "j", "Index", "MAD_SI", "MAD_HP", "MAD_DI")
+  
+  
+  
+  
+  merged_transients <- left_join(grouped_per_fly, intervals_grouped_per_fly, by = c("i" = "i", "j" = "j", "index" = "Index"))
+  merged_transients$relaxtime <- merged_transients$HP_median - merged_transients$peaked_median
+  
+  merged_transients <- left_join(merged_transients, MADs, by = c("i" = "i", "j" = "j", "index" = "Index"))
+  
+  # Summarize per kymograph
+  # grouped <- group_by(grouped, CODE, jpg, i) # Add Code as grouping variable
+  
+  
+  # Collapse all transients into a per fly average
+  #grouped_per_fly <- summarise_all(grouped, funs(mean,sd), na.rm = TRUE)
+  #grouped_per_fly <- grouped_per_fly[,c(1:4,6:28)]
+  
+  
+  
+  # Read all data from previous analysis
+  all_data <- as_tibble(read.csv(file = "all_data_table.csv", sep = ","))
+  all_data <- group_by(all_data, file)
+  # all_data <- all_data[-which(is.na(all_data$Xpos)),] # Bugfix
+  
+  
+  # Combine all data and timestamps
+  final_all_data <- left_join(all_data, merged_transients, by = c("file" = "jpg"))
+  final_all_data <- unique(final_all_data)
+  
+  # add Age:
+  final_all_data$Age <- as.factor(unlist(lapply(final_all_data$file, function(x) substring(x,gregexpr('_',x)[[1]][2]+1,gregexpr('_',x)[[1]]+1)[2])))
+  # add Sex:
+  final_all_data$Sex <- as.factor(unlist(lapply(final_all_data$file, function(x) substring(x,gregexpr('_',x)[[1]][3]-1,gregexpr('_',x)[[1]]-1)[3])))
+  
+  # Find all cxd files with duplicated peaks (bug from previous runs) and keep only the first
+  final_all_data_distinct <- final_all_data %>% dplyr::select(cxd_file, Xpos, file) %>% distinct()
+  final_all_data <- final_all_data[match(final_all_data_distinct$file, final_all_data$file),]
+  
+  # Remove Peak-Number from filename
+  final_all_data$file <- as.factor(unlist(lapply(final_all_data$file, function(x) paste0(substring(x, 1, gregexpr('peak_',x)[[1]][1]+4), "X",substring(x,gregexpr('_at',x)[[1]][1])))))
+  
+  # Column clean-up
+  final_all_data <- dplyr::select(final_all_data, -c("CODE.x", "sizeX", "sizeY", "sizeZ", "sizeC", "sizeT", "pixelType", "bitsPerPixel" , "imageCount" , "dimensionOrder", 
+                                                     "orderCertain", "rgb", "littleEndian", "interleaved", "falseColor", "metadataComplete", "thumbnail", "series" , "resolutionLevel", 
+                                                     "created_unix_from_file", "CODE.y", "recordingday", "week_year" , "i", "j", "index", "peaked_sd", "tt10p_sd", "tt25p_sd",
+                                                     "tt50p_sd", "tt75p_sd", "tt90p_sd", "tt90r_sd", "tt75r_sd", "tt50r_sd", "tt25r_sd", "tt10r_sd", "starts_median" , "peaks_median", 
+                                                     "ends_median", "starts_sd", "peaks_sd","ends_sd" , "SI_sd", "deltaSI_sd", "deltaSIplus_sd", "HP_sd", "DI_sd" )) # remove superfluous columns
+  
+  final_all_data <- dplyr::select(final_all_data, 34, 9, 1, 12, 13, 20:22, 10, 11, 15, 55, 56, 3:6, 49:51, 46, 35, 7:8, 52:54, 36:45, 47:48, 25, 26, 27, 28, 31, 29, 2, 14, 16:19, everything())
+  
+  # final_all_data <- final_all_data[,1:42]
+  final_all_data$stocks <- crosses$Stock.collection[match(final_all_data$CODE, crosses$CODE)]
+  final_all_data <- unique(final_all_data)
+  final_all_data <- group_by(final_all_data, flyID)
+  
+  
+  write.csv(final_all_data, file = "final_all_data.csv", row.names = F)
+  
+  
+  # Collapse all measurements into a per genotype average
+  data_per_fly <- final_all_data[,c(3,14:44, 55:56)]
+  data_per_fly <- group_by(data_per_fly, flyID)
+  
+  final_all_data_per_fly <- data_per_fly %>% summarise_all(c("median"), na.rm = TRUE)
+  final_all_data_per_fly <- left_join(final_all_data_per_fly, dplyr::select(final_all_data, c(1:10, 12:13, 48:53, 57)), by = c("flyID" = "flyID"))
+  final_all_data_per_fly <- unique(final_all_data_per_fly)
+  
+  # Add reversals
+  load(file="reversals.Rdata")
+  
+  filelist_with_index$reversals <- unlist(reversals)
+  files_reversals <- unique(filelist_with_index[,c(18,19)])
+  
+  files_reversals <- group_by(files_reversals, flyID)
+  
+  files_reversals_median <- files_reversals %>% summarise_all(c("median", "sd"), na.rm = TRUE)
+  names(files_reversals_median)[2:3] <- c("reversals_median", "reversals_SD")
+  
+  final_all_data_per_fly <- left_join(final_all_data_per_fly, files_reversals_median, by = c("flyID" = "flyID"))
+  
+  
+  write.csv(final_all_data_per_fly, file = "final_all_data_per_fly.csv", row.names = F)
+  
+  # Split into anterior vs posterior segment
+  final_all_data_anterior<- final_all_data[which(final_all_data$segment == 'anterior'),]
+  final_all_data_posterior <- final_all_data[which(final_all_data$segment == 'posterior'),]
+  write.csv(final_all_data_anterior, file = "final_all_data_anterior.csv", row.names = F)
+  write.csv(final_all_data_posterior, file = "final_all_data_posterior.csv", row.names = F)
+  
+  ## Anterior-only averages
+  {
+    
+    # Collapse all measurements into a per genotype average
+    data_per_fly <- final_all_data_anterior[,c(3,14:44, 55:56)]
+    data_per_fly <- group_by(data_per_fly, flyID)
+    
+    final_all_data_per_fly_anterior <- data_per_fly %>% summarise_all(c("median"), na.rm = TRUE)
+    final_all_data_per_fly_anterior <- left_join(final_all_data_per_fly_anterior, dplyr::select(final_all_data_anterior, c(1:10, 12:13, 48:53, 55)), by = c("flyID" = "flyID"))
+    final_all_data_per_fly_anterior <- unique(final_all_data_per_fly_anterior)
+    
+    write.csv(final_all_data_per_fly_anterior, file = "final_all_data_per_fly_anterior.csv", row.names = F)
+  }
+  
+  ## Posterior-only averages
+  {
+    
+    # Collapse all measurements into a per genotype average
+    data_per_fly <- final_all_data_posterior[,c(3,14:44, 55:56)]
+    data_per_fly <- group_by(data_per_fly, flyID)
+    
+    final_all_data_per_fly_posterior <- data_per_fly %>% summarise_all(c("median"), na.rm = TRUE)
+    final_all_data_per_fly_posterior <- left_join(final_all_data_per_fly_posterior, dplyr::select(final_all_data_posterior, c(1:10, 12:13, 48:53, 55)), by = c("flyID" = "flyID"))
+    final_all_data_per_fly_posterior <- unique(final_all_data_per_fly_posterior)
+    
+    write.csv(final_all_data_per_fly_posterior, file = "final_all_data_per_fly_posterior.csv", row.names = F)
+  }
+  
+}
+
+
+
+
+