Skip to content

Commit

Permalink
Merge branch 'master' of github.com:mwang87/ReDU-MS2-GNPS
Browse files Browse the repository at this point in the history
  • Loading branch information
mwang87 committed Sep 9, 2019
2 parents 62ee407 + 2101c67 commit 23c6297
Show file tree
Hide file tree
Showing 15 changed files with 855 additions and 139 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Welcome to ReDU :)
# Welcome to ReDU
## Reanalysis of Data User Interface

[ReDU](https://redu.ucsd.edu/) is a community- and data-driven approach to find and reuse public data containing tandem MS data at the repository scale. ReDU is a launchpad for co- or re-analysis of public data via the Global Natural Product Social Molecular Networking Platform [(GNPS)](https://gnps.ucsd.edu/ProteoSAFe/static/gnps-splash.jsp). Our aim is to empower researchers to put their data in the context of public data as well as explore questions using public data at the repository scale.
Expand Down
4 changes: 4 additions & 0 deletions code/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,9 @@ RUN pip3 install seaborn
#RUN export DEBIAN_FRONTEND=noninteractive
#RUN apt-get install -y r-base r-base-dev

RUN apt-get update -y
RUN apt-get install -y git-core
RUN pip3 install git+https://github.com/mwang87/CCMS_ProteoSAFe_pythonAPI.git

COPY . /app
WORKDIR /app
37 changes: 29 additions & 8 deletions code/redu_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,36 @@ def project_new_data(new_file_occurrence_table, output_file):

#reformat the occurance table for the new data being fed in
new_data = pd.read_csv(new_file_occurrence_table, sep = "\t")
all_compound_occurances = new_data["Compound_Name"]
all_file_occurances = new_data["full_CCMS_path"]

new_compound_list = new_data["LibraryID"].tolist()
new_data.drop(labels=["LibraryID", "TotalFiles"], axis=1, inplace=True)
just_matrix = new_data.values

new_sample_list = list(new_data.columns.values)

new_sparse_occ_matrix = pd.DataFrame(data = just_matrix, index = new_compound_list, columns = new_sample_list)

#create a new dataframe with only the information needed to reconstruct, redundant but easier to see
compounds_filname_df = pd.DataFrame({"Compound_Name" : all_compound_occurances, "full_CCMS_path" : all_file_occurances})

#sorting dataframe by sample in order to help? speed up things
compounds_filname_df.sort_values(by = "Compound_Name", axis = 0, inplace = True)

#determine the header for the new table
unique_compounds, compound_index = np.unique(compounds_filname_df["Compound_Name"], return_inverse = True)

#determine the unique samples for the new table
unique_sample, file_index = np.unique(compounds_filname_df["full_CCMS_path"], return_inverse = True)

all_compounds = list(compounds_filname_df["Compound_Name"])
all_samples = list(compounds_filname_df["full_CCMS_path"])

#create a matrix from the coordinates given
data = [1] * len(compound_index)

matrix = sps.coo_matrix((data, (compound_index, file_index)), shape = None).todok().toarray()
#handling duplicates within the array
matrix[matrix > 0] = 1

#convert it into the correct format for the return
new_sparse_occ_matrix = pd.DataFrame(index = list(unique_compounds), columns = list(unique_sample), data = matrix)

new_compound_list = list(unique_compounds)
new_sample_list = list(unique_sample)
#determine which compounds are common between the original and new datasets
find_common_compounds = [item for item in new_compound_list if item in old_compound_list]

Expand Down
Binary file modified code/static/img/redulogo.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions code/templates/layout.html
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
<li class="nav-item">
<a class="nav-link" href="/datalookup">File Query - Sample Information</a>
</li>
<li class="nav-item">
<a class="nav-link" href="ftp://massive.ucsd.edu/MSV000084206/other/ReDU_all_identifications.tsv">Download Annotations</a>
</li>
</ul>

</nav>
Expand Down
114 changes: 0 additions & 114 deletions code/test/integration_tests.py

This file was deleted.

117 changes: 117 additions & 0 deletions code/test/test_redu_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import os
import sys
import requests
import json

BASE_URL = "https://redu.ucsd.edu/"
SAMPLE_TASK_ID = "ffa003f6c4d844188f1f751d34c649b0"
TEST_COMPOUND = "2,5-Dimethoxyphenethylamine"

def test_pca_library_search():
query_url = BASE_URL + "processcomparemultivariate?task={}".format("f39c94cb7afe4568950bf61cdb8fee0d")
r = requests.get(query_url)
r.raise_for_status()

return 0

def test_pca_metabolomics_snets():
query_url = BASE_URL + "processcomparemultivariate?task={}".format("1ad7bc366aef45ce81d2dfcca0a9a5e7")
r = requests.get(query_url)
r.raise_for_status()

return 0

def test_pca_feature_based():
query_url = BASE_URL + "processcomparemultivariate?task={}".format("bb49a839face44cbb5ec3e6f855e7285")
r = requests.get(query_url)
r.raise_for_status()

return 0


def test_data_dump():
query_url = BASE_URL + "dump"
response = requests.get(query_url)
data = response.content
file_size = sys.getsizeof(data)

if file_size < 17762000:
return 1

return 0

def test_attribute_filtration():
query_url = BASE_URL + "attribute/MassSpectrometer/attributeterms?filters=%5B%5D"
response = requests.get(query_url)
data = json.loads(response.content)
key_value = list(data[0].keys())
expected_keys = ["attributename", "attributeterm", "ontologyterm", "countfiles"]

if (key_value != expected_keys):
return 1

return 0


def test_attribute_terms_display():
query_url = BASE_URL + "/attributes"
response = requests.get(query_url)
data = json.loads(response.content)
key_value = list(data[0].keys())

expected_keys = ["attributename", "attributedisplay", "countterms"]

if (key_value != expected_keys):
return 1

return 0

def test_file_enrichment():
query_url = BASE_URL + "compoundfilename"
params = {'compoundname' : TEST_COMPOUND}
response = requests.get(query_url, params = params)
data = json.loads(response.content)

key_value = next(iter(data[0]))

if (key_value != 'filepath'):
return 1

return 0

def test_compound_enrichment():
query_url = BASE_URL + "compoundenrichment"
params = {'compoundname' : TEST_COMPOUND}
response = requests.post(query_url, params )
data = json.loads(response.content)
key_value = list(data[0].keys())

expected_keys = ["attribute_name", "attribute_term", "totalfiles", "compoundfiles", "percentage"]

if key_value != expected_keys:
return 1

return 0

def test_your_pca():
params = {'task': SAMPLE_TASK_ID}
query_url = BASE_URL + "processcomparemultivariate"
response = requests.get(query_url, params = params)
data = response.content
file_size = sys.getsizeof(data)

if (file_size < 28000000):
return 1

return 0


def test_global_pca():
response = requests.get(BASE_URL + "displayglobalmultivariate")
data = response.content
file_size = sys.getsizeof(data)

if (file_size < 27760000):
return 1

return 0
Loading

0 comments on commit 23c6297

Please sign in to comment.