Skip to content

Commit

Permalink
Merge pull request #54 from carynwillis/master
Browse files Browse the repository at this point in the history
LCMS_file_formatting V1.1
  • Loading branch information
jaamarks authored Feb 6, 2025
2 parents c99b2bc + b008224 commit c6444cd
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 0 deletions.
24 changes: 24 additions & 0 deletions lcms_data_formatting/v1.1/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Base image
FROM rocker/tidyverse:4.4.1

# Maintainer and author
LABEL maintainer="Caryn Willis <[email protected]>"
LABEL description="An R script to format LCMS xlsx files."
LABEL base-image="rocker/tidyverse:4.4.1"

# Create working directory
RUN mkdir -p /scratch
WORKDIR /scratch

# Install necessary R packages
RUN Rscript -e 'install.packages(c("optparse", "openxlsx", "readxl", "tools"), repos = "http://cran.us.r-project.org")'


# Add to environment
ENV PATH=$PATH:/opt/

# Copy script
COPY LCMS_file_formatter.R /opt/LCMS_file_formatter.R

# Set default command to display help message
CMD ["Rscript", "/opt/LCMS_file_formatter.R", "-h"]
72 changes: 72 additions & 0 deletions lcms_data_formatting/v1.1/LCMS_file_formatter.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
library(optparse)
library(openxlsx)
library(readxl)
library(tools)
option_list <- list(
make_option(c("-f", "--file"), type="character", default=NULL,
help="LCMS file in .xlsx format", metavar="character"),
make_option(c("-c", "--column_converter"), type="character", default=NULL,
help="LCMS file column converter in .csv format", metavar="character"),
make_option(c("-d", "--columns_to_drop"), type="character", default=NULL,
help="Column names to drop other than the defaults .csv format", metavar="character")

)

opt_parser <- OptionParser(option_list=option_list)
opt <- parse_args(opt_parser)

if(is.null(opt$file) | is.null(opt$column_converter)){
stop("LCMS file and column converter are required to run this workflow.")
}else{
lcms_file <- opt$file
column_converter_file <- opt$column_converter
}
if (!is.null(opt$columns_to_drop)){
if(file_ext(opt$columns_to_drop)!="csv"){
stop("Columns to drop file must be in .csv format")
}else{
columns_to_drop_file<-opt$columns_to_drop
}

}

if(!(file_ext(lcms_file)=="xlsx")){
stop("LCMS file must be in .xlsx format")
}

if(file_ext(column_converter_file)!="csv"){
stop("Column converter file must be in .csv format")
}
lcms_file_sheets<-excel_sheets(lcms_file)
if(!("combined" %in% lcms_file_sheets)){
stop("LCMS file must contain sheet named combined for formatting")
}

#read in files
LCMS<-read_xlsx(lcms_file, sheet="combined")
LCMS<- as.data.frame(LCMS)
column_converter <-read.table(column_converter_file, sep=',')

#remove duplicate header columns
LCMS <- subset(LCMS, column != "Column")
LCMS <- subset(LCMS, column != "column")

#correct column names
names(LCMS)[match(column_converter$V1, names(LCMS))] <- column_converter$V2

if(!exists("columns_to_drop_file")){
drops<-c("# Usable QC","RSD QC Areas [%]","RT [min]", "Name")
}else{
drops<-c(read.table(columns_to_drop_file, sep=','))
print(drops)
}
#remove unnecessary columns
LCMS <-LCMS[, !(names(LCMS) %in% drops)]

#write out files
filename<- file_path_sans_ext(basename(lcms_file))
new_filename_csv<-paste0(filename,"_formatted.csv")
new_filename_xlsx<-paste0(filename,"_formatted.xlsx")
write.table(LCMS, new_filename_csv, sep=',',quote = FALSE)
write.xlsx(LCMS, new_filename_xlsx)

30 changes: 30 additions & 0 deletions lcms_data_formatting/v1.1/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Description

This Docker image contains a script to format LCMS xlsx files.

### Inputs
- LCMS xlsx file
- Column name converter file (.csv) with current column names in the 1st column, new column names in the second column, no header, any columns with names needed changed should be included
- Columns to drop file (.csv, optional) with list of column names to drop if different from the default: "# Usable QC","RSD QC Areas [%]","RT [min]", "Name"

### Run
```
docker run -it -v $PWD:/scratch lcms_data_formatting:v1.0 Rscript LCMS_file_formatter.R \
-f <lcms_file_here> \
-c <column_converter_here> \
-d <columns_to_drop_here>
```

### Files included

- `Dockerfile`: the Docker file used to build this image
- `LCMS_file_formatter.R`: R script that serves as the main executable when the Docker container is run. Expected behavior is to format the LCMS file by removing unnecessary rows and columns and rename column names to match correct sample prefix.

### Output
- This script will output a .xlsx and .csv set of files of the formatted LCMS data. It will be in the format of *lcms_file_name*_formatted.xlsx.

### Contact

If you have any questions or feedback, please feel free to contact the maintainers of this project:

- Caryn Willis, email: [email protected]

0 comments on commit c6444cd

Please sign in to comment.