From 26f1edb91617f5a409a2d022a1d2126a7a9089fb Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Wed, 25 Aug 2021 15:35:22 -0400 Subject: [PATCH 1/4] Add more metadata to INDRA edges --- src/indra_cogex/sources/indra_db/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/indra_cogex/sources/indra_db/__init__.py b/src/indra_cogex/sources/indra_db/__init__.py index 5771e0e8b..99e356066 100644 --- a/src/indra_cogex/sources/indra_db/__init__.py +++ b/src/indra_cogex/sources/indra_db/__init__.py @@ -92,6 +92,7 @@ def get_relations(self): # noqa:D102 "agB_id", "stmt_type", "source_counts", + "ev_count", "stmt_hash", ] for ( @@ -105,7 +106,11 @@ def get_relations(self): # noqa:D102 ) in ( self.df[columns].drop_duplicates().values ): - data = {"stmt_hash:long": stmt_hash, "source_counts:string": source_counts} + data = { + "stmt_hash:long": stmt_hash, + "source_counts:string": source_counts, + "ev_count:int": sum(source_counts.values()), + } yield Relation( source_ns, source_id, From fd28baefc33b6ccf33b0a1e221a3fe852f26deb7 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Sun, 29 Aug 2021 18:58:44 -0400 Subject: [PATCH 2/4] Change relation type and add more data --- src/indra_cogex/sources/indra_db/__init__.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/indra_cogex/sources/indra_db/__init__.py b/src/indra_cogex/sources/indra_db/__init__.py index 99e356066..f00ad67a3 100644 --- a/src/indra_cogex/sources/indra_db/__init__.py +++ b/src/indra_cogex/sources/indra_db/__init__.py @@ -85,6 +85,7 @@ def get_nodes(self): # noqa:D102 yield Node(db_ns, db_id, ["BioEntity"], dict(name=name)) def get_relations(self): # noqa:D102 + rel_type = "indra_rel" columns = [ "agA_ns", "agA_id", @@ -92,7 +93,8 @@ def get_relations(self): # noqa:D102 "agB_id", "stmt_type", "source_counts", - "ev_count", + "evidence_count", + "belief", "stmt_hash", ] for ( @@ -102,6 +104,8 @@ def get_relations(self): # noqa:D102 target_id, stmt_type, source_counts, + evidence_count, + belief, stmt_hash, ) in ( self.df[columns].drop_duplicates().values @@ -109,14 +113,16 @@ def get_relations(self): # noqa:D102 data = { "stmt_hash:long": stmt_hash, "source_counts:string": source_counts, - "ev_count:int": sum(source_counts.values()), + "evidence_count:int": evidence_count, + "stmt_type:string": stmt_type, + "belief:float": belief, } yield Relation( source_ns, source_id, target_ns, target_id, - stmt_type, + rel_type, data, ) From b69caef0660e905d8e2614bead6d2290b878113a Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Sun, 29 Aug 2021 19:01:29 -0400 Subject: [PATCH 3/4] Use numpy style docstring --- src/indra_cogex/sources/indra_db/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/indra_cogex/sources/indra_db/__init__.py b/src/indra_cogex/sources/indra_db/__init__.py index f00ad67a3..c548d7c8e 100644 --- a/src/indra_cogex/sources/indra_db/__init__.py +++ b/src/indra_cogex/sources/indra_db/__init__.py @@ -35,7 +35,11 @@ class DbProcessor(Processor): def __init__(self, path: Union[None, str, Path] = None): """Initialize the INDRA database processor. - :param path: The path to the INDRA database SIF dump pickle. If none given, will look in the default location. + Parameters + ---------- + path : + The path to the INDRA database SIF dump pickle. If none given, + will look in the default location. """ if path is None: path = pystow.join("indra", "db", name="sif.pkl") From f7a23a5b4956858f99cd8b072378d268b804b7f0 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Sun, 29 Aug 2021 20:02:45 -0400 Subject: [PATCH 4/4] Add one more corner case to fix --- src/indra_cogex/sources/indra_db/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/indra_cogex/sources/indra_db/__init__.py b/src/indra_cogex/sources/indra_db/__init__.py index c548d7c8e..c6cc1ec99 100644 --- a/src/indra_cogex/sources/indra_db/__init__.py +++ b/src/indra_cogex/sources/indra_db/__init__.py @@ -142,5 +142,7 @@ def fix_id(db_ns: str, db_id: str) -> Tuple[str, str]: db_ns = "UPLOC" if db_ns == "UP" and "-" in db_id and not db_id.startswith("SL-"): db_id = db_id.split("-")[0] + if db_ns == "FPLX" and db_id == "TCF-LEF": + db_id = "TCF_LEF" db_id = ensure_prefix_if_needed(db_ns, db_id) return db_ns, db_id