ABI-Software · Tehsurfer · May 20, 2021 · Jun 1, 2021 · Jun 1, 2021 · Jun 15, 2021
diff --git a/app/config.py b/app/config.py
@@ -31,7 +31,7 @@ class Config(object):
     KNOWLEDGEBASE_KEY = os.environ.get("KNOWLEDGEBASE_KEY", "secret-key")
     DEPLOY_ENV = os.environ.get("DEPLOY_ENV", "development")
     SPARC_APP_HOST = os.environ.get("SPARC_APP_HOST", "https://sparc-app.herokuapp.com")
-    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_Datasets_pr")
+    SCI_CRUNCH_HOST = os.environ.get("SCICRUNCH_HOST", "https://scicrunch.org/api/1/elastic/SPARC_PortalDatasets_dev")
     MAPSTATE_TABLENAME = os.environ.get("MAPSTATE_TABLENAME", "mapstates")
     SCAFFOLDSTATE_TABLENAME = os.environ.get("SCAFFOLDSTATE_TABLENAME", "scaffoldstates")
     WRIKE_TOKEN = os.environ.get("WRIKE_TOKEN")

diff --git a/app/main.py b/app/main.py
@@ -210,6 +210,63 @@ def direct_download_url(path):
     resource = response["Body"].read()
     return resource
 
+# /scicrunch/: Returns scicrunch results for a given <search> query
+@app.route("/scicrunch-dataset/<doi1>/<doi2>")
+def sci_doi(doi1,doi2):
+    doi = doi1 + '/' + doi2
+    print(doi)
+    data = create_doi_request(doi)
+    try:
+        response = requests.post(
+            f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}',
+            json=data)
+        return response.json()
+    except requests.exceptions.HTTPError as err:
+        logging.error(err)
+        return json.dumps({'error': err})
+
+# /scicrunch-processed/: Returns scicrunch results for a given <search> query
+@app.route("/scicrunch-dataset-processed/<doi1>/<doi2>")
+def sci_doi_processed(doi1,doi2):
+    doi = doi1 + '/' + doi2
+    print(doi)
+    data = create_doi_request(doi)
+    try:
+        response = requests.post(
+            f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}',
+            json=data)
+        return process_kb_results(response.json())
+    except requests.exceptions.HTTPError as err:
+        logging.error(err)
+        return json.dumps({'error': err})
+
+# /scicrunch-query-string/: Returns results for given organ curie. These can be processed by the sidebar
+@app.route("/scicrunch-query-string/")
+def sci_organ():
+    fields = request.args.getlist('field')
+    curie = request.args.get('curie')
+    # field example: "*organ.curie"
+    data = {
+        "size": 20,
+        "from": 0,
+        "query": {
+            "query_string": {
+                "fields": fields,
+                "query": curie
+            }
+        }
+    }
+
+    try:
+        response = requests.post(
+            f'{Config.SCI_CRUNCH_HOST}/_search?api_key={Config.KNOWLEDGEBASE_KEY}',
+            json=data)
+        return process_kb_results(response.json())
+    except requests.exceptions.HTTPError as err:
+        logging.error(err)
+        return json.dumps({'error': err})
+
+
 
 # /search/: Returns scicrunch results for a given <search> query
 @app.route("/search/", defaults={'query': ''})

diff --git a/app/process_kb_results.py b/app/process_kb_results.py
@@ -17,6 +17,23 @@
     'csvFiles': ['objects']
 }
 
+def create_doi_request(doi):
+
+    query = {
+        "query": {
+            "bool": {
+                "must": [{"match_all": {}}],
+                "should": [],
+                "filter": {
+                    "term": {
+                        "_id": f'DOI:{doi}'
+                    }
+                }
+            }
+        }
+    }
+
+    return query
 
 # create_facet_query(type): Generates facet search request data for scicrunch  given a 'type'; where
 # 'type' is either 'species', 'gender', or 'genotype' at this stage.
@@ -25,7 +42,8 @@ def create_facet_query(type):
     type_map = {
         'species': ['organisms.primary.species.name.aggregate', 'organisms.sample.species.name.aggregate'],
         'gender': ['attributes.subject.sex.value'],
-        'genotype': ['anatomy.organ.name.aggregate']
+        'genotype': ['anatomy.organ.name.aggregate'],
+        'organ': ['anatomy.organ.name.aggregate']
     }
 
     data = {
@@ -69,7 +87,8 @@ def create_filter_request(query, terms, facets, size, start):
     type_map = {
         'species': ['organisms.primary.species.name.aggregate', 'organisms.sample.species.name'],
         'gender': ['attributes.subject.sex.value', 'attributes.sample.sex.value'],
-        'genotype': ['anatomy.organ.name.aggregate']
+        'genotype': ['anatomy.organ.name.aggregate'],
+        'organ': ['anatomy.organ.name.aggregate']
     }
 
     # Data structure of a scicrunch search
@@ -136,7 +155,9 @@ def process_kb_results(results):
     for i, hit in enumerate(hits):
         attr = get_attributes(attributes, hit)
         attr['doi'] = convert_doi_to_url(attr['doi'])
-        attr['csvFiles'] = find_csv_files(attr['csvFiles'])
+        objects = attr['csvFiles']  # Have to do this as not all datsets return objects
+        attr['csvFiles'] = find_csv_files(objects)
+        attr['scaffolds'] = find_scaffold_json_files(objects)
         output.append(attr)
     return json.dumps({'numberOfHits': results['hits']['total'], 'results': output})
 
@@ -146,11 +167,36 @@ def convert_doi_to_url(doi):
         return doi
     return doi.replace('DOI:', 'https://doi.org/')
 
+def convert_url_to_doi(doi):
+    if not doi:
+        return doi
+    return doi.replace('https://doi.org/', 'DOI:')
+
 
 def find_csv_files(obj_list):
     if not obj_list:
         return obj_list
-    return [obj for obj in obj_list if obj.get('mimetype', 'none') == 'text/csv']
+    return [obj for obj in obj_list if obj.get('mimetype', {}).get('name', 'none') == 'text/csv']
+
+
+def find_scaffold_json_files(obj_list):
+    if not obj_list:
+        return obj_list
+    return [obj for obj in obj_list if obj.get('additional_mimetype', {}).get('name', 'none') == 'inode/vnd.abi.scaffold+file']
+
+
+attributes = {
+    'scaffolds': ['scaffolds'],
+    'samples': ['attributes','sample','subject'],
+    'name': ['item','name'],
+    'identifier': ['item', 'identifier'],
+    'uri': ['distributions', 'current', 'uri'],
+    'updated': ['dates', 'updated'],
+    'organs': ['anatomy', 'organ'],
+    'contributors': ['contributors'],
+    'doi': ['item', 'curie'],
+    'csvFiles': ['objects']
+}
 
 
 # get_attributes: Use 'attributes' (defined at top of this document) to step through the large scicrunch result dict
@@ -160,11 +206,12 @@ def get_attributes(attributes, dataset):
     for k, attr in attributes.items():
         subset = dataset['_source'] # set our subest to the full dataset result
         key_attr = False
-        for key in attr:
+        for n, key in enumerate(attr): # step through attributes
             if isinstance(subset, dict):
-                if key in subset.keys():
+                if key in subset.keys(): # continue if keys are found
                     subset = subset[key]
-                    key_attr = subset
+                    if n+1 is len(attr): # if we made it to the end, save this subset
+                        key_attr = subset
         found_attr[k] = key_attr
     return found_attr
 
diff --git a/tests/test_scicrunch.py b/tests/test_scicrunch.py
@@ -13,6 +13,11 @@ def test_scicrunch_keys(client):
     assert r.status_code == 200
     assert 'numberOfHits' in json.loads(r.data).keys()
 
+def test_scicrunch_dataset_doi(client):
+    r = client.get('/scicrunch-dataset/DOI%3A10.26275%2Fpzek-91wx')
+    assert json.loads(r.data)['hits']['hits'][0]['_id'] == "DOI:10.26275/pzek-91wx"
+
+
 def test_scicrunch_search(client):
     r = client.get('/search/heart')
     assert r.status_code == 200