Skip to content

Commit

Permalink
Merge pull request #95 from alan-jarmusch/master
Browse files Browse the repository at this point in the history
update to example code and ReDU logo
  • Loading branch information
mwang87 committed Aug 26, 2019
2 parents 07ed037 + 6e4a02e commit eb3d13b
Show file tree
Hide file tree
Showing 7 changed files with 635 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ plot <- ggplot(df_plot,aes(x=as.factor(sample), as.numeric(proportion*100)))+
legend.title=element_text(colour="black", size=6),
legend.text=element_text(colour="black", size=6) ) +
# labs(x="", y="Annotated in Proportion of Files", title = chemical)
labs(x="", y="Annotated in Human Files (%)")
labs(x="", y="Annotated in Files (%)")
print(plot)
ggsave(plot, file=paste0("Manuscript/Figures/Source_Material/ReDU_ChemicalEnrichment_",chemical,".pdf"),
width = 1, height = 1.15, units = "in", useDingbats=FALSE)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
library(data.table)
library(dplyr)
library(tidyr)
library(ggplot2)
setwd("~/Research/Projects/ReDU_MS2")

df <- fread("Manuscript/Data/ReDU_chemical_enrichment_bacteria.txt", sep="\t", header=TRUE)

df[1:5,1:7]
colnames(df) <- c("Chemical",
"# 1423|Bacillus subtilis (n=89)","1423|Bacillus subtilis (n=89)",
"# 1280|Staphylococcus aureus (n=49)","1280|Staphylococcus aureus (n=49)",
"# 1883|Streptomyces (n=7)","1883|Streptomyces (n=7)"
#"# of G4","Proportion of G4",
#"# of G5","Proportion of G5",
#"# of G6","Proportion of G6",
)

df <- df[,c(1,3,5,7)]

df_gather <- gather(data=df, sample, proportion, 2:length(df))

#chemical <- "Surfactin_C14"
chemical <- "Spectral Match to Cholic acid from NIST14"

df_plot <- subset(df_gather, df_gather$Chemical == chemical)

plot <- ggplot(df_plot,aes(x=as.factor(sample), as.numeric(proportion*100)))+
geom_bar(aes(fill=sample), stat="identity", width=0.75)+
#facet_wrap(~Comparison, ncol=3)+
scale_fill_manual(values = c("#e41a1c","#377eb8","#fdbf6f"))+
theme_minimal()+
theme(panel.grid.major.x=element_blank(),
panel.grid.major.y=element_line(colour="grey90",size=0.5, linetype="dashed"),
panel.grid.minor=element_blank(),
axis.ticks=element_line(colour ="black",size=0.5, linetype="solid"),
axis.text=element_text(colour="black",size=6),
axis.text.x=element_text(colour="black", size=6),
axis.line=element_line(colour="black",size=0.5, linetype="solid"),
axis.title=element_text(colour="black",size=6),
strip.text.x=element_text(colour="black",size=6),
aspect.ratio=2,
title=element_text(colour="black", size=6),
legend.position="none",
legend.title=element_text(colour="black", size=6),
legend.text=element_text(colour="black", size=6) ) +
# labs(x="", y="Annotated in Proportion of Files", title = chemical)
labs(x="", y="Annotated in Files (%)")
print(plot)
ggsave(plot, file=paste0("Manuscript/Figures/Source_Material/ReDU_ChemicalEnrichment_",chemical,".pdf"),
width = 1, height = 1.15, units = "in", useDingbats=FALSE)

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
library(data.table)
library(dplyr)
library(ggplot2)
setwd("~/Research/Projects/ReDU_MS2")

df <- fread("Manuscript/Data/ReDU_all_identifications.tsv", sep="\t", header=TRUE)

library_GNPS <- df %>% group_by(Organism,Compound_Name) %>% summarise(Unique = n_distinct(full_CCMS_path))
library_GNPS[1:10,1:3]

library_GNPSinfo <- library_GNPS %>% group_by(Organism) %>% summarise(total_hits = sum(Unique))
colnames(library_GNPSinfo)[1] <- "library_name"

library_size <- fread("Manuscript/Data/GNPS_library_annotation_info/LIVING-DATA-SEARCH-ba6a5b6a-production_library_sizes-main.tsv", sep="\t", header=TRUE)

library_GNPSinfo_final <- library_GNPSinfo %>% left_join(library_size, by="library_name") %>% mutate(proportion = total_hits/number_spectra)

write.csv(library_GNPSinfo_final, "Manuscript/Figures/Source_Material/ReDUSummary_libraryinfo.csv", row.names=FALSE)
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
library_name number_spectra
GNPS-COLLECTIONS-MISC 46
MMV_NEGATIVE 47
GNPS-COLLECTIONS-PESTICIDES-NEGATIVE 76
LDB_POSITIVE 83
MMV_POSITIVE 110
GNPS-FAULKNERLEGACY 127
GNPS-PRESTWICKPHYTOCHEM 143
MIADB 172
BILELIB19 177
GNPS-NIH-CLINICALCOLLECTION2 195
LDB_NEGATIVE 226
GNPS-NIH-CLINICALCOLLECTION1 377
DEREPLICATOR_IDENTIFIED_LIBRARY 379
CASMI 568
GNPS-EMBL-MCF 585
GNPS-COLLECTIONS-PESTICIDES-POSITIVE 653
GNPS-SELLECKCHEM-FDA-PART2 656
SUMNER 904
GNPS-NIH-NATURALPRODUCTSLIBRARY 1267
GNPS-NIH-SMALLMOLECULEPHARMACOLOGICALLYACTIVE 1460
MASSBANKEU 1492
GNPS-NIH-NATURALPRODUCTSLIBRARY_ROUND2_NEGATIVE 1863
HMDB 2235
GNPS-SELLECKCHEM-FDA-PART1 2388
GNPS-LIBRARY 4697
GNPS-NIST14-MATCHES 5763
GNPS-NIH-NATURALPRODUCTSLIBRARY_ROUND2_POSITIVE 5796
RESPECT 7112
MASSBANK 11999
PNNL-LIPIDS-NEGATIVE 16142
PNNL-LIPIDS-POSITIVE 30582
MONA 49241
32 changes: 32 additions & 0 deletions examples/MS2_library_information/ReDUSummary_libraryinfo.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"library_name","total_hits","number_spectra","proportion"
"BILELIB19",25392,177,143.457627118644
"CASMI",9164,568,16.1338028169014
"DEREPLICATOR_IDENTIFIED_LIBRARY",4840,379,12.7704485488127
"GNPS-COLLECTIONS-MISC",5757,46,125.152173913043
"GNPS-COLLECTIONS-PESTICIDES-NEGATIVE",9,76,0.118421052631579
"GNPS-COLLECTIONS-PESTICIDES-POSITIVE",2707,653,4.14548238897397
"GNPS-EMBL-MCF",31082,585,53.1316239316239
"GNPS-FAULKNERLEGACY",1322,127,10.4094488188976
"GNPS-LIBRARY",150457,4697,32.0325739833937
"GNPS-NIH-CLINICALCOLLECTION1",3512,377,9.31564986737401
"GNPS-NIH-CLINICALCOLLECTION2",671,195,3.44102564102564
"GNPS-NIH-NATURALPRODUCTSLIBRARY",4474,1267,3.53117600631413
"GNPS-NIH-NATURALPRODUCTSLIBRARY_ROUND2_NEGATIVE",1667,1863,0.894793344068706
"GNPS-NIH-NATURALPRODUCTSLIBRARY_ROUND2_POSITIVE",194438,5796,33.5469289164941
"GNPS-NIH-SMALLMOLECULEPHARMACOLOGICALLYACTIVE",4625,1460,3.16780821917808
"GNPS-NIST14-MATCHES",944273,5763,163.85094568801
"GNPS-PRESTWICKPHYTOCHEM",689,143,4.81818181818182
"GNPS-SELLECKCHEM-FDA-PART1",12760,2388,5.34338358458962
"GNPS-SELLECKCHEM-FDA-PART2",5050,656,7.69817073170732
"HMDB",16827,2235,7.52885906040268
"LDB_NEGATIVE",2632,226,11.646017699115
"LDB_POSITIVE",745,83,8.97590361445783
"MASSBANK",110935,11999,9.24535377948162
"MASSBANKEU",9239,1492,6.19235924932976
"MIADB",571,172,3.31976744186047
"MMV_POSITIVE",585,110,5.31818181818182
"MONA",278184,49241,5.64943847606669
"PNNL-LIPIDS-NEGATIVE",3126,16142,0.193656300334531
"PNNL-LIPIDS-POSITIVE",298000,30582,9.74429402916748
"RESPECT",33684,7112,4.73622047244094
"SUMNER",2742,904,3.03318584070796
18 changes: 18 additions & 0 deletions examples/MS2_library_information/params.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<parameters>
<parameter name="RUN_CLUSTER">NO</parameter>
<parameter name="RUN_CONVERT">NO</parameter>
<parameter name="RUN_DEREPLICATOR">NO</parameter>
<parameter name="RUN_GLOBALNETWORK">NO</parameter>
<parameter name="RUN_LIBRARY_FILES_CREATION">YES</parameter>
<parameter name="RUN_MOLECULEEXPLORER">NO</parameter>
<parameter name="RUN_REPORTER">NO</parameter>
<parameter name="RUN_SEARCH">NO</parameter>
<parameter name="desc">Test Library Creation [9]</parameter>
<parameter name="email">[email protected]</parameter>
<parameter name="task">ba6a5b6a1c0946b3a641c67ad59fb2df</parameter>
<parameter name="user">mwang87</parameter>
<parameter name="uuid">1E14A8E2-5075-0001-3D8A-6940F0501B9D</parameter>
<parameter name="workflow">LIVING-DATA-SEARCH</parameter>
<parameter name="workflow_version">1.2.5</parameter>
</parameters>

0 comments on commit eb3d13b

Please sign in to comment.