From df6f005e0bef66e7780f1cfa8a531b0ca72dfb86 Mon Sep 17 00:00:00 2001 From: Sundareswar Pullela <110754869+sundareswarpullela@users.noreply.github.com> Date: Mon, 30 Sep 2024 10:17:05 -0700 Subject: [PATCH 1/7] update KG2c version to 2.10.1 #2380" --- app/config_kg2c.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/config_kg2c.json b/app/config_kg2c.json index be9f52f..cdda26b 100644 --- a/app/config_kg2c.json +++ b/app/config_kg2c.json @@ -1,6 +1,6 @@ { - "nodes_file": "https://kg2webhost.rtx.ai/kg2c-2.10.0-v1.0-nodes.jsonl.gz", - "edges_file": "https://kg2webhost.rtx.ai/kg2c-2.10.0-v1.0-edges.jsonl.gz", + "nodes_file": "https://kg2webhost.rtx.ai/kg2c-2.10.1-v1.0-nodes.jsonl.gz", + "edges_file": "https://kg2webhost.rtx.ai/kg2c-2.10.1-v1.0-edges.jsonl.gz", "biolink_version": "4.2.1", "kp_infores_curie": "infores:rtx-kg2", "endpoint_name": "kg2c", From ca30ee82847395ed3f97e2a93c495207fc92c9fb Mon Sep 17 00:00:00 2001 From: amykglen Date: Tue, 1 Oct 2024 15:43:24 -0700 Subject: [PATCH 2/7] Minor fixes to go into main branch with kg2.10.1 --- app/app/main.py | 5 +++-- app/app/plover.py | 10 +++++----- test/plover_tester.py | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/app/app/main.py b/app/app/main.py index 7b414e8..c4f1626 100644 --- a/app/app/main.py +++ b/app/app/main.py @@ -138,8 +138,9 @@ def run_get_logs(num_lines: int = 100): log_data_plover = f.readlines() with open('/var/log/uwsgi.log', 'r') as f: log_data_uwsgi = f.readlines() - response = {"description": f"The last {num_lines} lines from each of three logs (Plover, Gunicorn error, and " - "Gunicorn access) are included below.", + response = {"description": f"The last {num_lines} lines from two logs (Plover and uwsgi) " + f"are included below. You can control the number of lines shown with the " + f"num_lines parameter (e.g., ?num_lines=500).", "plover": log_data_plover[-num_lines:], "uwsgi": log_data_uwsgi[-num_lines:]} return flask.jsonify(response) diff --git a/app/app/plover.py b/app/app/plover.py index cdb8411..6a0e4dd 100644 --- a/app/app/plover.py +++ b/app/app/plover.py @@ -286,11 +286,6 @@ def build_indexes(self): deduplicated_edge["supporting_studies"] = list(study_objs_by_nctids.values()) self.edge_lookup_map = deduplicated_edges_map - # Save the preferred ID map now that we're done using it - self._save_to_pickle_file(self.preferred_id_map, f"{self.indexes_dir_path}/preferred_id_map.pkl") - del self.preferred_id_map - gc.collect() - # Convert all edges to their canonical predicate form; correct missing biolink prefixes logging.info(f"Converting edges to their canonical form") for edge_id, edge in self.edge_lookup_map.items(): @@ -386,6 +381,11 @@ def build_indexes(self): del self.subclass_index gc.collect() + # Save the preferred ID map now that we're done using it + self._save_to_pickle_file(self.preferred_id_map, f"{self.indexes_dir_path}/preferred_id_map.pkl") + del self.preferred_id_map + gc.collect() + # Create reversed category/predicate maps now that we're done building those maps self.category_map_reversed = self._reverse_dictionary(self.category_map) self.predicate_map_reversed = self._reverse_dictionary(self.predicate_map) diff --git a/test/plover_tester.py b/test/plover_tester.py index 1f6cd56..9c8e4df 100644 --- a/test/plover_tester.py +++ b/test/plover_tester.py @@ -170,7 +170,7 @@ def print_results(results: List[dict]): @staticmethod def get_supporting_study_attributes(edge: dict) -> List[dict]: return [attribute for attribute in edge["attributes"] - if attribute["attribute_type_id"] == "biolink:supporting_study"] + if attribute["attribute_type_id"] == "biolink:supporting_study_metadata"] @staticmethod def get_num_distinct_concepts(response: dict, qnode_key: str) -> int: From 83597ce780eaea7f99a37904d5019781126b0fcf Mon Sep 17 00:00:00 2001 From: amykglen Date: Fri, 4 Oct 2024 10:24:31 -0700 Subject: [PATCH 3/7] Go back to python 3.8 to try to fix memory leak --- Dockerfile | 2 +- app/nginx_ssl_template.conf | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9b98f69..11b7282 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM tiangolo/uwsgi-nginx-flask:python3.11 +FROM tiangolo/uwsgi-nginx-flask:python3.8 # Increase timeout (thanks https://github.com/tiangolo/uwsgi-nginx-flask-docker/issues/120#issuecomment-459857072) RUN echo "uwsgi_read_timeout 600;" > /etc/nginx/conf.d/custom_timeout.conf diff --git a/app/nginx_ssl_template.conf b/app/nginx_ssl_template.conf index b1c8a1c..919b38a 100644 --- a/app/nginx_ssl_template.conf +++ b/app/nginx_ssl_template.conf @@ -1,9 +1,9 @@ user nginx; -worker_processes 1; +worker_processes 2; error_log /var/log/nginx/error.log warn; pid /var/run/nginx.pid; events { - worker_connections 1024; + worker_connections 2048; } http { include /etc/nginx/mime.types; @@ -13,7 +13,7 @@ http { '"$http_user_agent" "$http_x_forwarded_for"'; access_log /var/log/nginx/access.log main; sendfile on; - keepalive_timeout 65; + keepalive_timeout 300; include /etc/nginx/conf.d/*.conf; server { listen 80; From a5c29f99c2e3811cb054a40072a5152ec9c817dd Mon Sep 17 00:00:00 2001 From: amykglen Date: Tue, 8 Oct 2024 08:05:00 -0700 Subject: [PATCH 4/7] Fix python version and keepalive timeout after merge --- Dockerfile | 2 +- app/nginx_ssl_template.conf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 9adce3b..3a5de47 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM tiangolo/uwsgi-nginx-flask:python3.8 +FROM tiangolo/uwsgi-nginx-flask:python3.11 # Increase timeout (thanks https://github.com/tiangolo/uwsgi-nginx-flask-docker/issues/120#issuecomment-459857072) RUN echo "uwsgi_read_timeout 600;" > /etc/nginx/conf.d/custom_timeout.conf diff --git a/app/nginx_ssl_template.conf b/app/nginx_ssl_template.conf index 919b38a..d62f982 100644 --- a/app/nginx_ssl_template.conf +++ b/app/nginx_ssl_template.conf @@ -13,7 +13,7 @@ http { '"$http_user_agent" "$http_x_forwarded_for"'; access_log /var/log/nginx/access.log main; sendfile on; - keepalive_timeout 300; + keepalive_timeout 65; include /etc/nginx/conf.d/*.conf; server { listen 80; From 5a969cedd2ebbb9502e54cc3ad7757a0f80c190f Mon Sep 17 00:00:00 2001 From: amykglen Date: Wed, 9 Oct 2024 09:04:41 -0700 Subject: [PATCH 5/7] Correct KG2c files to use after merging main into kg2.10.1c --- app/config_kg2c.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/config_kg2c.json b/app/config_kg2c.json index ab7a138..be1e410 100644 --- a/app/config_kg2c.json +++ b/app/config_kg2c.json @@ -1,6 +1,6 @@ { - "nodes_file": "https://kg2webhost.rtx.ai/kg2c-2.10.0-v1.0-nodes.jsonl.gz", - "edges_file": "https://kg2webhost.rtx.ai/kg2c-2.10.0-v1.0-edges.jsonl.gz", + "nodes_file": "https://kg2webhost.rtx.ai/kg2c-2.10.1-v1.0-nodes.jsonl.gz", + "edges_file": "https://kg2webhost.rtx.ai/kg2c-2.10.1-v1.0-edges.jsonl.gz", "biolink_version": "4.2.1", "kp_infores_curie": "infores:rtx-kg2", "endpoint_name": "kg2c", From 442f8ca5711d0ed8c5bdf59582a3d0ce86288285 Mon Sep 17 00:00:00 2001 From: amykglen Date: Wed, 9 Oct 2024 12:49:25 -0700 Subject: [PATCH 6/7] Add patches for RTX-KG2 issues #416 and #414 --- app/app/plover.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/app/app/plover.py b/app/app/plover.py index 62dd21f..2110a5d 100644 --- a/app/app/plover.py +++ b/app/app/plover.py @@ -156,12 +156,16 @@ def build_indexes(self): if "qualified_object_aspect" in edge: edge[self.graph_object_aspect_property] = edge["qualified_object_aspect"] del edge["qualified_object_aspect"] + # TODO: Remove this patch after these KG2.10.1pre issues are fixed in future KG2pre versions + edge["predicate"] = edge["predicate"].replace("biolink:biolink_", "biolink:") + if edge["primary_knowledge_source"] == "infores:biothings-multiomics-clinicaltrials": + edge["primary_knowledge_source"] = "infores:multiomics-clinicaltrials" # Zip up specified 'zip' columns to form a list of dicts (e.g., list of supporting studies) if self.kg_config.get("zip"): for zipped_prop_name, zipped_prop_info in self.kg_config["zip"].items(): for edge in edges: - # TODO: Remove this patch after CTKP TSVs are fixed.. + # TODO: Add generalized way of handling this situation (not CTKP-specific) if "tested_intervention" in zipped_prop_info["properties"]: edge["tested_intervention"] = edge["tested_intervention"] * len(edge["nctid"]) zip_cols = [edge[property_name] for property_name in zipped_prop_info["properties"]] From 9d689ab014fb294839a1782ce29b77244f10fed0 Mon Sep 17 00:00:00 2001 From: amykglen Date: Fri, 11 Oct 2024 14:56:14 -0700 Subject: [PATCH 7/7] Add attribute_source to all KG2 node/edge attributes --- app/config_kg2c.json | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/app/config_kg2c.json b/app/config_kg2c.json index be1e410..38dae34 100644 --- a/app/config_kg2c.json +++ b/app/config_kg2c.json @@ -30,38 +30,46 @@ }, "iri": { "attribute_type_id": "biolink:IriType", - "value_type_id": "metatype:Uri" + "value_type_id": "metatype:Uri", + "attribute_source": "{kp_infores_curie}" }, "description": { "attribute_type_id": "biolink:description", - "value_type_id": "metatype:String" + "value_type_id": "metatype:String", + "attribute_source": "{kp_infores_curie}" }, "all_categories": { "attribute_type_id": "biolink:category", "value_type_id": "metatype:Uriorcurie", - "description": "Categories of all nodes in this synonym set in RTX-KG2." + "description": "Categories of all nodes in this synonym set in RTX-KG2.", + "attribute_source": "{kp_infores_curie}" }, "all_names": { "attribute_type_id": "biolink:synonym", "value_type_id": "metatype:String", - "description": "Names of all nodes in this synonym set in RTX-KG2." + "description": "Names of all nodes in this synonym set in RTX-KG2.", + "attribute_source": "{kp_infores_curie}" }, "equivalent_curies": { "attribute_type_id": "biolink:xref", "value_type_id": "metatype:Nodeidentifier", - "description": "Identifiers of all nodes in this synonym set in RTX-KG2." + "description": "Identifiers of all nodes in this synonym set in RTX-KG2.", + "attribute_source": "{kp_infores_curie}" }, "publications": { "attribute_type_id": "biolink:publications", - "value_type_id": "biolink:Uriorcurie" + "value_type_id": "biolink:Uriorcurie", + "attribute_source": "{kp_infores_curie}" }, "kg2_ids": { "attribute_type_id": "biolink:original_predicate", "value_type_id": "metatype:String", - "description": "The IDs of the original RTX-KG2pre edge(s) corresponding to this edge prior to any synonymization or remapping." + "description": "The IDs of the original RTX-KG2pre edge(s) corresponding to this edge prior to any synonymization or remapping.", + "attribute_source": "{kp_infores_curie}" }, "publications_info": { - "attribute_type_id": "bts:sentence" + "attribute_type_id": "bts:sentence", + "attribute_source": "{kp_infores_curie}" } }, "sources_template": {}