-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #54 from carynwillis/master
LCMS_file_formatting V1.1
- Loading branch information
Showing
3 changed files
with
126 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
# Base image | ||
FROM rocker/tidyverse:4.4.1 | ||
|
||
# Maintainer and author | ||
LABEL maintainer="Caryn Willis <[email protected]>" | ||
LABEL description="An R script to format LCMS xlsx files." | ||
LABEL base-image="rocker/tidyverse:4.4.1" | ||
|
||
# Create working directory | ||
RUN mkdir -p /scratch | ||
WORKDIR /scratch | ||
|
||
# Install necessary R packages | ||
RUN Rscript -e 'install.packages(c("optparse", "openxlsx", "readxl", "tools"), repos = "http://cran.us.r-project.org")' | ||
|
||
|
||
# Add to environment | ||
ENV PATH=$PATH:/opt/ | ||
|
||
# Copy script | ||
COPY LCMS_file_formatter.R /opt/LCMS_file_formatter.R | ||
|
||
# Set default command to display help message | ||
CMD ["Rscript", "/opt/LCMS_file_formatter.R", "-h"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
library(optparse) | ||
library(openxlsx) | ||
library(readxl) | ||
library(tools) | ||
option_list <- list( | ||
make_option(c("-f", "--file"), type="character", default=NULL, | ||
help="LCMS file in .xlsx format", metavar="character"), | ||
make_option(c("-c", "--column_converter"), type="character", default=NULL, | ||
help="LCMS file column converter in .csv format", metavar="character"), | ||
make_option(c("-d", "--columns_to_drop"), type="character", default=NULL, | ||
help="Column names to drop other than the defaults .csv format", metavar="character") | ||
|
||
) | ||
|
||
opt_parser <- OptionParser(option_list=option_list) | ||
opt <- parse_args(opt_parser) | ||
|
||
if(is.null(opt$file) | is.null(opt$column_converter)){ | ||
stop("LCMS file and column converter are required to run this workflow.") | ||
}else{ | ||
lcms_file <- opt$file | ||
column_converter_file <- opt$column_converter | ||
} | ||
if (!is.null(opt$columns_to_drop)){ | ||
if(file_ext(opt$columns_to_drop)!="csv"){ | ||
stop("Columns to drop file must be in .csv format") | ||
}else{ | ||
columns_to_drop_file<-opt$columns_to_drop | ||
} | ||
|
||
} | ||
|
||
if(!(file_ext(lcms_file)=="xlsx")){ | ||
stop("LCMS file must be in .xlsx format") | ||
} | ||
|
||
if(file_ext(column_converter_file)!="csv"){ | ||
stop("Column converter file must be in .csv format") | ||
} | ||
lcms_file_sheets<-excel_sheets(lcms_file) | ||
if(!("combined" %in% lcms_file_sheets)){ | ||
stop("LCMS file must contain sheet named combined for formatting") | ||
} | ||
|
||
#read in files | ||
LCMS<-read_xlsx(lcms_file, sheet="combined") | ||
LCMS<- as.data.frame(LCMS) | ||
column_converter <-read.table(column_converter_file, sep=',') | ||
|
||
#remove duplicate header columns | ||
LCMS <- subset(LCMS, column != "Column") | ||
LCMS <- subset(LCMS, column != "column") | ||
|
||
#correct column names | ||
names(LCMS)[match(column_converter$V1, names(LCMS))] <- column_converter$V2 | ||
|
||
if(!exists("columns_to_drop_file")){ | ||
drops<-c("# Usable QC","RSD QC Areas [%]","RT [min]", "Name") | ||
}else{ | ||
drops<-c(read.table(columns_to_drop_file, sep=',')) | ||
print(drops) | ||
} | ||
#remove unnecessary columns | ||
LCMS <-LCMS[, !(names(LCMS) %in% drops)] | ||
|
||
#write out files | ||
filename<- file_path_sans_ext(basename(lcms_file)) | ||
new_filename_csv<-paste0(filename,"_formatted.csv") | ||
new_filename_xlsx<-paste0(filename,"_formatted.xlsx") | ||
write.table(LCMS, new_filename_csv, sep=',',quote = FALSE) | ||
write.xlsx(LCMS, new_filename_xlsx) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Description | ||
|
||
This Docker image contains a script to format LCMS xlsx files. | ||
|
||
### Inputs | ||
- LCMS xlsx file | ||
- Column name converter file (.csv) with current column names in the 1st column, new column names in the second column, no header, any columns with names needed changed should be included | ||
- Columns to drop file (.csv, optional) with list of column names to drop if different from the default: "# Usable QC","RSD QC Areas [%]","RT [min]", "Name" | ||
|
||
### Run | ||
``` | ||
docker run -it -v $PWD:/scratch lcms_data_formatting:v1.0 Rscript LCMS_file_formatter.R \ | ||
-f <lcms_file_here> \ | ||
-c <column_converter_here> \ | ||
-d <columns_to_drop_here> | ||
``` | ||
|
||
### Files included | ||
|
||
- `Dockerfile`: the Docker file used to build this image | ||
- `LCMS_file_formatter.R`: R script that serves as the main executable when the Docker container is run. Expected behavior is to format the LCMS file by removing unnecessary rows and columns and rename column names to match correct sample prefix. | ||
|
||
### Output | ||
- This script will output a .xlsx and .csv set of files of the formatted LCMS data. It will be in the format of *lcms_file_name*_formatted.xlsx. | ||
|
||
### Contact | ||
|
||
If you have any questions or feedback, please feel free to contact the maintainers of this project: | ||
|
||
- Caryn Willis, email: [email protected] |