From 3f05008b78b827dc1df068f2b74550fd02ac8809 Mon Sep 17 00:00:00 2001
From: Andras Stirling <stirling.andras@gmail.com>
Date: Fri, 16 Jul 2021 16:08:43 +0200
Subject: [PATCH] Fix get_relations function

---
 .gitignore                                    |  1 -
 .../sources/clinicaltrials/__init__.py        | 48 ++++++++++---------
 tests/test_clinicaltrials.py                  |  7 +++
 3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3d58b1a48..30ed74218 100644
--- a/.gitignore
+++ b/.gitignore
@@ -129,6 +129,5 @@ dmypy.json
 .pyre/
 import.report
 
-
 .idea
 .idea/*
\ No newline at end of file
diff --git a/src/indra_cogex/sources/clinicaltrials/__init__.py b/src/indra_cogex/sources/clinicaltrials/__init__.py
index 9adcd5dbb..4a412d1f5 100644
--- a/src/indra_cogex/sources/clinicaltrials/__init__.py
+++ b/src/indra_cogex/sources/clinicaltrials/__init__.py
@@ -5,6 +5,7 @@
 from indra_cogex.representation import Node, Relation
 
 drug_pattern = re.compile(r"^Drug: ([a-zA-Z ]|\d)+$")
+#id_pattern = re.compile(r'^https://ClinicalTrials.gov/show/NCT(\d+)$')
 
 
 class ClinicaltrialsProcessor(Processor):
@@ -36,27 +37,30 @@ def get_nodes(self):
                             )
 
     def get_relations(self):
-        for conditions in self.df["Conditions"]:
-            for condition in conditions.split("|"):
+        for index, row in self.df.iterrows():
+            for condition in row["Conditions"].split("|"):
                 cond_matches = gilda.ground(condition)
                 if cond_matches:
-                    for interventions in self.df["Interventions"]:
-                        if not pd.isna(interventions):
-                            for intervention in interventions.split("|"):
-                                if drug_pattern.match(intervention):
-                                    int_matches = gilda.ground(intervention[6:])
-                                    if int_matches:
-                                        yield Relation(
-                                            source_ns=cond_matches[0].term.db,
-                                            source_id=cond_matches[0].term.id,
-                                            target_ns=int_matches[0].term.db,
-                                            target_id=int_matches[0].term.id,
-                                            rel_type="has_trial",
-                                        )
-                                        yield Relation(
-                                            source_ns=cond_matches[0].term.db,
-                                            source_id=cond_matches[0].term.id,
-                                            target_ns=int_matches[0].term.db,
-                                            target_id=int_matches[0].term.id,
-                                            rel_type="tested_in",
-                                        )
+                    source_ns = cond_matches[0].term.db
+                    source_id = cond_matches[0].term.id
+                    if not pd.isna(row["Interventions"]):
+                        for intervention in row["Interventions"].split("|"):
+                            if drug_pattern.match(intervention):
+                                int_matches = gilda.ground(intervention[6:])
+                                if int_matches:
+                                    target_ns = int_matches[0].term.db
+                                    target_id = row["URL"][32:]
+                                    yield Relation(
+                                        source_ns=source_ns,
+                                        source_id=source_id,
+                                        target_ns=target_ns,
+                                        target_id=target_id,
+                                        rel_type="has_trial"
+                                    )
+                                    yield Relation(
+                                        source_ns=source_ns,
+                                        source_id=source_id,
+                                        target_ns=target_ns,
+                                        target_id=target_id,
+                                        rel_type="tested_in"
+                                    )
diff --git a/tests/test_clinicaltrials.py b/tests/test_clinicaltrials.py
index 6ff2c58d0..390cd90eb 100644
--- a/tests/test_clinicaltrials.py
+++ b/tests/test_clinicaltrials.py
@@ -7,3 +7,10 @@ def test_get_nodes():
     cp = ClinicaltrialsProcessor(path)
     nodes = list(cp.get_nodes())
     assert len(nodes) is not 0
+
+
+def test_get_nodes():
+    path = os.path.join(os.path.dirname(__file__), "test_search_results.tsv")
+    cp = ClinicaltrialsProcessor(path)
+    relations = list(cp.get_relations())
+    # TODO: Test get_relations