Merge pull request #2 from SumedhSankhe/dev

Dev
SumedhSankhe · Mar 17, 2020 · e38e134 · e38e134
2 parents ccf0139 + 4b5b069
commit e38e134
Show file tree

Hide file tree

Showing 30 changed files with 1,619 additions and 1,621 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,10 @@
 .RData
 .Ruserdata
 *.log
-.DS_Store
-*.Rproj
-sessionInfo.txt
+*.txt
+*.dcf
+*.rds
+*.pdf
+logs/
+rsconnect/
+MSstatsSampleSize-data/
diff --git a/MSstatsSampleSize-Shiny.Rproj b/MSstatsSampleSize-Shiny.Rproj
@@ -0,0 +1,13 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
diff --git a/functions.R b/functions.R
diff --git a/global.R b/global.R
@@ -1,30 +1,47 @@
-
+suppressMessages({
 library(shiny)
 library(shinydashboard)
 library(shinyWidgets)
-library(markdown)
-library(shinyjs)
 library(ggplot2)
-library(reshape2)
-library(RColorBrewer)
-library(viridis)
-library(MSstatsBioData)
-library(MSstats)
-library(MSstatsSampleSize)
-library(caret)
-library(plotly)
-library(future)
-library(promises)
-library(doParallel)
-
-MSstatsPackages <- c("lme4", "marray", "limma", "gplots", "ggplot2", "methods", "grid", "ggrepel",
-                   "preprocessCore", "reshape2", "survival", "statmod", "minpack.lm", "utils",
-                   "grDevices", "graphics", "stats", "doSNOW", "snow", "foreach", "data.table",
-                   "MASS", "dplyr", "tidyr", "broom", "purrr", "stringr", "randomForest")
-lapply(MSstatsPackages, library, character.only = TRUE)
-#source("scripts/convert_to_MSstats.R")
-#source("scripts/ParSampleSizeCalculationClassification.R")
-#source("scripts/GroupComparison.R")
-#source("scripts/methods.R")
-
-library(lime)
+library(data.table)
+library(dplyr)
+library(naivebayes)
+library(randomForest)
+library(kernlab)
+library(e1071)
+})
+source('functions.R')
+
+FORMATS_LIST <- list("Protein-level quantification" = "standard", 
+                     "Example from MSstatsSampleSize" = "examples")
+FORMATS <- c("examples", "standard")
+EXTENSTIONS <- c("text/csv",
+                 "text/comma-separated-values,text/plain",
+                 ".csv", "text/tab-separated-values", ".tsv")
+
+MODELS <- c('','rf','nnet','svmLinear','logreg','naive_bayes')
+names(MODELS) <- c('',"Random Forest", "Neural Network",
+                   "Support Vector Machines with Linear Kernel",
+                   "Logistic Regression", "Naive Bayes")
+
+STOPPING_METRIC <- c("AUTO", "deviance", "logloss", "MSE", "RMSE", "MAE", "RMSLE",
+                     "AUC", "lift_top_group", "misclassification", "AUCPR",
+                     "mean_per_class_error", "custom", "custom_increasing")
+
+FOLD_ASSIGNMENT <- c("AUTO", "Random", "Modulo", "Stratified")
+
+
+FAMILY <-  c("gaussian", "binomial", "quasibinomial", "ordinal", "multinomial",
+           "poisson", "gamma", "tweedie", "negativebinomial")
+
+SOLVER <- c("AUTO", "IRLSM", "L_BFGS", "COORDINATE_DESCENT_NAIVE", 
+            "COORDINATE_DESCENT", "GRADIENT_DESCENT_LH", "GRADIENT_DESCENT_SQERR")
+
+LINK <- c("family_default", "identity", "logit", "log", "inverse", "tweedie",
+          "ologit")
+
+B_GROUP <- ""
+TUNING <- c("Use h2o Package", "Parameter Tuning")
+# config <- h2o_config()
+# h2o::h2o.init(nthreads = 12, max_mem_size = "2g",
+#               log_dir = config$log_dir, log_level = config$log_level)
diff --git a/help_mds/b_group.md b/help_mds/b_group.md
@@ -0,0 +1 @@
+One group must be selected as a baseline.
diff --git a/help_mds/diff_prot.md b/help_mds/diff_prot.md
@@ -0,0 +1,3 @@
+The user should provide a list of protein names to simulate, which users expect 
+to have the fold changes greater than 1. Other proteins that are not available 
+in the list will be expected to have fold change = 1.
diff --git a/help_mds/exp_fc.md b/help_mds/exp_fc.md
@@ -0,0 +1,3 @@
+If selecting Yes, the fold changes and variance for simulation are directly 
+estimated from the input data. Otherwise, the fold changes have to be set in the
+following.
diff --git a/help_mds/fc_values.md b/help_mds/fc_values.md
@@ -0,0 +1,4 @@
+The fold changes between each of the other group and the baseline group have to 
+be filled in the following table.
+
+*Add more images with examples*
diff --git a/help_mds/n_samp_grp.md b/help_mds/n_samp_grp.md
@@ -0,0 +1 @@
+Number of samples per group to simulate. Default is 50.
diff --git a/help_mds/n_sim.md b/help_mds/n_sim.md
@@ -0,0 +1,7 @@
+---
+title: "Untitled"
+author: "Sumedh Sankhe"
+date: "2/7/2020"
+output: pdf_document
+---
+Number of times to repeat simulation experiments (Number of simulated datasets). Default is 10.
diff --git a/help_mds/n_val_samp_grp.md b/help_mds/n_val_samp_grp.md
@@ -0,0 +1,2 @@
+Number of validation samples per group to simulate. This option works only when 
+user selects **Simulate Validation Set** = TRUE. Default is 50.
diff --git a/help_mds/prot_prop.md b/help_mds/prot_prop.md
@@ -0,0 +1 @@
+Number of proteins to simulate. Proteins are ranked in decreasing order based on their mean abundance across all the samples and top protein_number proteins will be selected to simulate. Default is maximum available proteins.
diff --git a/help_mds/sel_sim_prot.md b/help_mds/sel_sim_prot.md
@@ -0,0 +1,3 @@
+The standard to select proteins from the input data to simulate. It can be  
+1) **proportion** of total number of proteins in the input data  
+2) **number** to specify the number of proteins.
diff --git a/help_mds/sim_val.md b/help_mds/sim_val.md
@@ -0,0 +1 @@
+Default is FALSE. If TRUE, simulate the validation set; otherwise, the input data will be used as the validation set
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Number of samples per group to simulate. Default is 50.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Number of validation samples per group to simulate. This option works only when
		user selects Simulate Validation Set = TRUE. Default is 50.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Number of proteins to simulate. Proteins are ranked in decreasing order based on their mean abundance across all the samples and top protein_number proteins will be selected to simulate. Default is maximum available proteins.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Default is FALSE. If TRUE, simulate the validation set; otherwise, the input data will be used as the validation set