Automation of statistical test with an identical data input aiming to reduce arduous work searching for packages and changing data input.
The package includes
-
Simple Statistics :u-test, t-test, post hocs of Anova and Kruskal Wallis with FDR adjusted values
-
Bar, Box, Dot, Violin plots with significance (u-test, t-test, post hocs of Anova and Kruskal Wallis)
-
Scaling & Transformation
-
Normality check (Shapiro Wilk test)
-
Scheirer–Ray–Hare Test
-
Volcano plot
-
Heatmap
-
PERMANOVA
-
NMDS
-
PCA
-
PCoA
https://cran.r-project.org/bin/windows/base/
https://www.rstudio.com/products/rstudio/download/
https://cran.r-project.org/bin/windows/Rtools/
install.packages("devtools")
devtools::install_github("daehwankim12/LMSstat")
devtools::install_github("daehwankim12/LMSstat", ref = "develop")
library(LMSstat)
- Simple statistics
- Barplot, Boxplot, Dotplot
- Volcano plot
- Scheirer–Ray–Hare Test
- PERMANOVA
- NMDS
- PCA
- Scaling & Transformation
- Normality check (Shapiro Wilk test)
- Heatmap
#Sample Data provided within the package
data("Data")
# Uploading your own Data
setwd("~")
Data <- read.csv("statT.csv", check.names = FALSE)
# When uploading an Excel file
Data <- as.data.frame(readxl::read_excel("statT.xlsx", sheet = 2))
# When your data is large
Data <- as.data.frame(data.table::fread("statT.csv", check.names = FALSE, header = TRUE, integer64 = "double"))
statT.csv
- PERMANOVA
# Sample Data provided within the package
data("Classification")
# Uploading your own Data
Classification <- read.csv("statT_G.csv", header = F)
statT_G.csv
# Use `parallel = TRUE` for datasets with more than 1000 metabolites.
Statfile <- All_stats(Data,Adjust_p_value = TRUE, Adjust_method = "BH", parallel = FALSE)
Statfile <- Allstats(Data,Adjust_p_value = TRUE, Adjust_method = "BH") # Previous version using for-loop
-
Adjust_p_value = T # Set True if adjustment is needed
-
Adjust_method = F # Adjustment methods frequently used. c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none")
head(Statfile[["Result"]]) # includes all statistical results
write.csv(Statfile[["Result"]], "p_value_result.csv") # Write csv with all the p-value included
install.packages("writexl")
writexl::write_xlsx(list(mysheet = Data), "p_value_result.xlsx") # Write xlsx with all the p-value included
data.table::fwrite(as.data.frame(Statfile[["Result"]]), "p_value_result.csv", row.names = TRUE)
# Makes a subdirectory and saves box plots for all the variables
Boxplot(Statfile,asterisk = "u_test")
# Makes a subdirectory and saves dot plots for all the variables
Dotplot(Statfile,asterisk = "t_test")
# Makes a subdirectory and saves bar plots for all the variables
Barplot(Statfile,asterisk = "Scheffe")
# Makes a subdirectory and saves violin plots for all the variables
Violinplot(Statfile,asterisk = "Scheffe")
Boxplot(Statfile) Dotplot(Statfile)
Barplot(Statfile) Violinplot(Statfile)
- asterisk = "t_test" #c("Dunn","Scheffe","u_test","t_test")
- significant_variable_only = F # If set to TRUE, insignificant results will not be plotted
- color = c("#FF3300", "#FF6600", "#FFCC00", "#99CC00", "#0066CC", "#660099") # Colors for the plots
- legend_position = "none" # "none","left","right","bottom","top"
- order = NULL # Order of the groups c("LAC","LUE","WEI","SDF","HGH","ASH")
- tip_length = 0.01 # significance tip length
- label_size = 2.88 # significance label size
- step_increase = 0.05 #significance step increase
- width = 0.3 # box width ; size = 3 # dot size
- fig_width = NA #figure size
- fig_height = NA #figure size
- Y_text = 12 # Y title size
- X_text = 10 # X text size
- Y_lab = 10 #y axis text size
- T_size = 15 # Title size
- sig_int = c(0.1,0.05) # significance interval
scaled_data <- D_tran(Data, param = "Auto")
Raw_Data Scaled_Data
-
param = "None" # "None","Auto","log10","Pareto"
-
save = F #Set true if datafile is to be saved
#Shapiro Wilk test
Result <- Norm_test(Data)
write.csv(Result, "Normality_test_Result.csv")
# csv files including significant variables (Multilevel, Group, interaction) and a Venn diagram are downloaded
SRH(Data)
- Adjust_p_value = T # Set True if adjustment is needed
- Adjust_method = "BH" # Adjustment methods frequently used. c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", "none")
# Makes a subdirectory and saves Volcano plots for different combination of groups
Test <- All_stats(Data)
Volcano(Test, asterisk = "t-test")
- asterisk = "t-test" #statistics inheriting from Allstats "Scheffe", "t-test", "u-test", "Dunn"
- reverse = T # T, F reverse the direction of fold change
- fig_width = NA #figure size
- fig_height = NA #figure size
- FC_log = 2 # Fold change log transformation value
- pval_log = 10 #p_value log transformation value
- dotsize = 3 #dotsize
- x_limit = c(-2,2) #x axis limt
- y_limit =c(0,6) #y axis limit
- pval_intercept = 0.05 # intercept for identification
- sig_label = T # T,F label significant variables
- color=c("#FF3300","#FF6600","#FFCC00") #colors used for ggplots.
- fixed_limit = F #whether the limit should be fixed or not T, F
- max_overlap = 20 #maximum overlap for labels
- FC_range = c(-1.5,1.5) #significant fold change range
# Makes a subdirectory and saves Heatmap
scaled_data <- D_tran(Data, param = "Auto")
Heatmap(scaled_data) #data inheriting from D_tran
- col =c("green", "white", "red") # colors for heatmap
- col_lim = c(-3, 0, 3) # color boundaries
- reverse = T # T,F Reverse column and rows
- distance = "pearson" # Distance matrix for HCA "pearson", "manhattan","euclidean","spearman","kendall" ,
- rownames = T # T,F
- colnames = T # T,F
- Hsize = (3,6) # Width & Height c(a,b)
- g_legend = "Group" # Annotation legend title
- h_legend = "Color Key" # Heatmap legend title
- Title ="Title" # Title
- T_size = 10 # Title text size
- R_size = 3 # row text size
- C_size = 3 # column text size
- Gcol =c("ASD" = "black","HGH"="red","LAC"="blue","LUE" ="grey","SDF" = "yellow","WEI"="green") # Color for top_annotation bar
- dend_h = 0.5 #dendrite height
- a_h = 0.2 # top annotation hegiht
data("Data")
data("Classification")
PERMANOVA done with the Group column
Indiv_Perm(Data) # The group information is treated as a factor
Loops PERMANOVA over different classes provided by Classification
Result <- Multi_Perm(Data, Classification) # The group information is treated as factors
- method = Dissimilarity index c("manhattan", "euclidean", "canberra", "clark", "bray", "kulczynski", "jaccard", "gower", "altGower", "morisita", "horn", "mountford", "raup", "binomial", "chao", "cao", "mahalanobis", "chisq", "chord")
# Makes a subdirectory and saves NMDS plots for all of the distance metrics
NMDS(Data, methods = c("manhattan", "bray", "euclidean"))
NMDS plot with bray distance and p-value from PERMANOVA
-
methods = Dissimilarity index c("manhattan", "euclidean", "canberra", "clark", "bray", "kulczynski", "jaccard", "gower", "altGower", "morisita", "horn", "mountford", "raup", "binomial", "chao", "cao", "mahalanobis", "chisq", "chord")
-
color = c("#FF3300", "#FF6600", "#FFCC00", "#99CC00", "#0066CC", "#660099") # Colors for the plots
-
legend_position = "none" # "none","left","right","bottom","top"
-
fig_width = NA #figure size
-
fig_height = NA #figure size
-
names = F # used to indicate sample names
-
dotsize = 3 # dotsize
-
labsize = 3 # label size
# Makes a subdirectory and saves PCA plot
PCA(Data, components = c(1, 2), legend_position = "none")
PCA plot with selected components
- color = c("#FF3300", "#FF6600", "#FFCC00", "#99CC00", "#0066CC", "#660099") # Colors for the plots
- legend_position = "none" # "none","left","right","bottom","top"
- fig_width = NA #figure size
- fig_height = NA #figure size
- components = c(1,2) # selected components
- names = F # used to indicate sample names
- dotsize = 3 # dotsize
- labsize = 3 # label size
- ellipse = T # T or F to show ellipse
# Makes a subdirectory and saves PCoA plot
PCoA(Data, components = c(1, 2), methods = c("bray", "manhattan"))
PCoA plot with selected components
- color = c("#FF3300", "#FF6600", "#FFCC00", "#99CC00", "#0066CC", "#660099") # Colors for the plots
- legend_position = "none" # "none","left","right","bottom","top"
- fig_width = NA #figure size
- fig_height = NA #figure size
- components = c(1,2) # selected components
- names = F # used to indicate sample names
- dotsize = 3 # dotsize
- labsize = 3 # label size
- ellipse = T # T or F to show ellipse
- methods = Dissimilarity index c("manhattan", "euclidean", "canberra", "clark", "bray", "kulczynski", "jaccard", "gower", "altGower", "morisita", "horn", "mountford", "raup", "binomial", "chao", "cao", "mahalanobis", "chisq",chord")