Skip to content

Commit

Permalink
Merge pull request #243 from SmartAPI/refactor-tests
Browse files Browse the repository at this point in the history
model and structure update
  • Loading branch information
NikkiBytes authored May 17, 2024
2 parents 5b6ccbc + a2be26b commit ad2e330
Show file tree
Hide file tree
Showing 9 changed files with 948 additions and 1,015 deletions.
11 changes: 6 additions & 5 deletions src/controller/smartapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
from datetime import datetime, timezone
from warnings import warn

import pprint

from model import ConsolidatedMetaKGDoc, MetaKGDoc, SmartAPIDoc
from utils import decoder, monitor
from utils.downloader import download
Expand Down Expand Up @@ -192,19 +194,18 @@ def edge_consolidation_build(cls):
key = f'{edge["_source"]["subject"]}-{edge["_source"]["predicate"]}-{edge["_source"]["object"]}'
# Set edge details
edge_api = edge["_source"]["api"]
edge_bte = edge["_source"]["bte"]
edge_details_dict = {"api":edge_api , "bte":edge_bte}

# Add edge to the dictionary, merging API details if the edge already exists
if key in edge_dict:
if edge_details_dict not in edge_dict[key]["apis"]:
edge_dict[key]["apis"].append(edge_details_dict)
if edge_api not in edge_dict[key]["api"]:
edge_dict[key]["api"].append(edge_api)
else:
edge_dict[key] = {
"_id": key,
"subject": edge["_source"]["subject"],
"object": edge["_source"]["object"],
"predicate": edge["_source"]["predicate"],
"apis": [edge_details_dict]
"api": [edge_api]
}

processed_edges += 1
Expand Down
52 changes: 35 additions & 17 deletions src/handlers/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import logging
from typing import List, Union
import os

import bmt
from biothings.utils import serializer
from biothings.web.auth.authn import BioThingsAuthnMixin
Expand Down Expand Up @@ -460,25 +459,44 @@ async def get(self, *args, **kwargs):

await super().get(*args, **kwargs)

def get_filtered_api_info(self, api_dict):
def get_filtered_api(self, api_dict):
"""Extract and return filtered API information."""
api_info = api_dict.get('api', {})
return {
'name': api_info.get('name', 'Default Name'),
'smartapi': {
'id': api_info.get('smartapi', {}).get('id', 'Default ID')
api_info = api_dict

if not self.args.bte and not self.args.api_details: # no bte and no api details
filtered_api= {
'name': api_info.get('name', 'Default Name'),
'smartapi': {
'id': api_info.get('smartapi', {}).get('id', 'Default ID')
}
}
elif self.args.bte and not self.args.api_details : # bte and no api details
filtered_api= {
'name': api_info.get('name', 'Default Name'),
'smartapi': {
'id': api_info.get('smartapi', {}).get('id', 'Default ID')
},
'bte': api_info.get('bte', {})
}
}
elif not self.args.bte and self.args.api_details: # no bte and api details
api_info.pop('bte', None)
filtered_api= api_info
return filtered_api

def process_apis(self, apis):
"""Process each API dict based on provided args."""
for i, api_dict in enumerate(apis):
if not self.args.api_details:
filtered_api_info = self.get_filtered_api_info(api_dict)
apis[i]['api'] = filtered_api_info
if not self.args.bte:
api_dict.pop('bte', None)

if isinstance(apis, list):
for i, api_dict in enumerate(apis):
filtered_api = self.get_filtered_api(api_dict)
apis[i] = filtered_api
elif isinstance(apis, dict):
if 'bte' in apis:
# update dict for new format
apis['api']['bte']=apis.pop('bte')
api_dict = apis["api"]
filtered_api= self.get_filtered_api(api_dict)
apis["api"] = filtered_api

def write(self, chunk):
"""
Overwrite the biothings query handler to ...
Expand All @@ -491,10 +509,10 @@ def write(self, chunk):
try:
if self.args.consolidated:
for data_hit in chunk['hits']:
self.process_apis(data_hit['apis'])
self.process_apis(data_hit['api'])
else:
for hit_dict in chunk['hits']:
self.process_apis([hit_dict])
self.process_apis(hit_dict)

if self.format == "graphml":
chunk = edges2graphml(
Expand Down
63 changes: 5 additions & 58 deletions src/model/metakg.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Elasticsearch Document Object Model for MetaKG
"""
from elasticsearch_dsl import InnerDoc, Keyword, Object, Text, analysis, mapping
from elasticsearch_dsl import InnerDoc, Keyword, Object, Nested, Text, analysis, mapping

from config import METAKG_ES_INDEX, METAKG_ES_INDEX_CONSOLIDATED

Expand Down Expand Up @@ -42,42 +42,6 @@
"mapping": {"type": "object", "enabled": False},
}
},
# {
# "ignore_api_params_field": {
# "path_match": "*bte.query_operation.params",
# "mapping": {"type": "object", "enabled": False},
# }
# },
# {
# "ignore_api_request_body_field": {
# "path_match": "apis.bte.query_operation.request_body",
# "mapping": {"type": "object", "enabled": False},
# }
# },
# {
# "ignore_api_response_mapping_field": {
# "path_match": "apis.bte.response_mapping",
# "mapping": {"type": "object", "enabled": False},
# }
# },
# {
# "ignore_api_params_field": {
# "path_match": "api.bte.query_operation.params",
# "mapping": {"type": "object", "enabled": False},
# }
# },
# {
# "ignore_api_request_body_field": {
# "path_match": "api.bte.query_operation.request_body",
# "mapping": {"type": "object", "enabled": False},
# }
# # },
# {
# "ignore_api_response_mapping_field": {
# "path_match": "api.bte.response_mapping",
# "mapping": {"type": "object", "enabled": False},
# }
# },
{
"default_string": {
"match_mapping_type": "string",
Expand All @@ -91,6 +55,7 @@
],
)


# # add two copy_to fields
metakg_mapping.field("all", "text") # the default all field for unfielded queries
metakg_mapping.field("node", lowercase_keyword) # a field combines both subject and object fields
Expand All @@ -111,20 +76,16 @@ class APIInnerDoc(InnerDoc):
name = default_text
smartapi = Object(SmartAPIInnerDoc)
tags = lowercase_keyword_copy_to_all
provided_by = default_text
# We cannot define "x-translator" field here due the "-" in the name,
# so we will have it indexed via the dynamic templates

class ConsolidatedAPIInnerDoc(APIInnerDoc):
provided_by = default_text
tags = lowercase_keyword_copy_to_all



class MetaKGDoc(BaseDoc):
subject = lowercase_keyword_node
object = lowercase_keyword_node
predicate = lowercase_keyword_copy_to_all
provided_by = default_text
# provided_by = default_text
api = Object(APIInnerDoc)

class Index:
Expand All @@ -147,17 +108,10 @@ def get_url(self):
return self.api.smartapi.metadata


class ConsolidatedMetaKGDoc(BaseDoc):
class ConsolidatedMetaKGDoc(MetaKGDoc):
"""MetaKG ES index for edges consolidated on their subject/predicate/object
Multiple APIs providing the same edge, grouped as a list under the 'api' field.
"""

# Existing fields
subject = lowercase_keyword_node
object = lowercase_keyword_node
predicate = lowercase_keyword_copy_to_all
apis = Object(ConsolidatedAPIInnerDoc)

class Index:
"""
Index Settings
Expand All @@ -170,10 +124,3 @@ class Index:
"mapping.ignore_malformed": True,
"mapping.total_fields.limit": 2500,
}

class Meta:
mapping = metakg_mapping

def get_url(self):
return self.api.smartapi.metadata

4 changes: 2 additions & 2 deletions src/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,9 @@ def apply_extras(self, search, options):
"""
apply extra filters
"""
if not options._source:
# if not options._source:
# by default exclude api.bte or bte field, but can be included by specifying in the fields parameter
options._source = ["-api.bte", "-bte"]
# options._source = ["-api.bte", "-bte"]

search = super().apply_extras(search, options)
# apply extra filters from query parameters
Expand Down
Loading

0 comments on commit ad2e330

Please sign in to comment.