Skip to content

Commit

Permalink
add changes to files
Browse files Browse the repository at this point in the history
  • Loading branch information
jsstevenson committed Jan 19, 2024
1 parent cddfeee commit ea42de7
Show file tree
Hide file tree
Showing 54 changed files with 4,937 additions and 3,028 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,11 @@ analysis/graph/*.ipynb

# Build files
Pipfile.lock
pyproject.toml

# DynamoDB
dynamodb_local_latest/

# Zip
*.zip

notebooks
notebooks
59 changes: 33 additions & 26 deletions analysis/civic/examples/harvester/civic_harvester_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,35 @@
def create_evidence_examples(data):
"""Create five CIViC evidence examples."""
evidence_items = list()
for i in range(len(data['evidence'])):
if data['evidence'][i]['assertions']:
evidence_items.append(data['evidence'][i])
for i in range(len(data["evidence"])):
if data["evidence"][i]["assertions"]:
evidence_items.append(data["evidence"][i])
if len(evidence_items) == 6:
break

for evidence_item in evidence_items:
variant_id = evidence_item['variant_id']
gene_id = evidence_item['gene_id']
assertions = evidence_item['assertions']
variant_id = evidence_item["variant_id"]
gene_id = evidence_item["gene_id"]
assertions = evidence_item["assertions"]

for v in data['variants']:
if v['id'] == variant_id:
for v in data["variants"]:
if v["id"] == variant_id:
variant = v

for g in data['genes']:
if g['id'] == gene_id:
for g in data["genes"]:
if g["id"] == gene_id:
gene = g

with open(f"{PROJECT_ROOT}/analysis/civic/examples/harvester/"
f"{evidence_item['name']}.json", 'w+') as f:
with open(
f"{PROJECT_ROOT}/analysis/civic/examples/harvester/"
f"{evidence_item['name']}.json",
"w+",
) as f:
example = {
'EVIDENCE': evidence_item,
'GENE': gene,
'VARIANT': variant,
'ASSERTIONS': assertions
"EVIDENCE": evidence_item,
"GENE": gene,
"VARIANT": variant,
"ASSERTIONS": assertions,
}

json.dump(example, f, indent=4)
Expand All @@ -45,26 +48,30 @@ def create_variant_examples(data):
"""
variants_ids = [12, 1, 221, 190]
variants = list()
for i in range(len(data['variants'])):
if data['variants'][i]['id'] in variants_ids:
variants.append(data['variants'][i])
for i in range(len(data["variants"])):
if data["variants"][i]["id"] in variants_ids:
variants.append(data["variants"][i])

for variant in variants:
with open(f"{PROJECT_ROOT}/analysis/civic/examples/harvester/"
f"{variant['name'].lower()}.json", 'w+') as f:
variant['evidence_items'] = variant['evidence_items'][0]
with open(
f"{PROJECT_ROOT}/analysis/civic/examples/harvester/"
f"{variant['name'].lower()}.json",
"w+",
) as f:
variant["evidence_items"] = variant["evidence_items"][0]
f.write(json.dumps(variant, indent=4))


if __name__ == '__main__':
if __name__ == "__main__":
c = CIViCHarvester()
c.harvest()
latest = sorted((APP_ROOT / "data" / "civic" / "harvester").glob("civic_harvester_*.json"))[-1] # noqa: E501
latest = sorted(
(APP_ROOT / "data" / "civic" / "harvester").glob("civic_harvester_*.json")
)[-1]
with open(latest, "r") as f:
civic_data = json.load(f)

civic_ex_dir =\
PROJECT_ROOT / 'analysis' / 'civic' / 'examples' / 'harvester'
civic_ex_dir = PROJECT_ROOT / "analysis" / "civic" / "examples" / "harvester"
civic_ex_dir.mkdir(exist_ok=True, parents=True)

create_evidence_examples(civic_data)
Expand Down
111 changes: 61 additions & 50 deletions analysis/civic/examples/transform/civic_transform_example.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,29 @@
"""Create an example json file for CIViC Transform."""
import json

from metakb import PROJECT_ROOT, APP_ROOT
from metakb import APP_ROOT, PROJECT_ROOT
from metakb.transform import CIViCTransform


def create_civic_example(civic_data):
"""Create CIViC transform examples from list of evidence items."""
ex = {
'statements': [],
'propositions': [],
'variation_descriptors': [],
'gene_descriptors': [],
'therapy_descriptors': [],
'disease_descriptors': [],
'methods': [],
'documents': []
"statements": [],
"propositions": [],
"variation_descriptors": [],
"gene_descriptors": [],
"therapy_descriptors": [],
"disease_descriptors": [],
"methods": [],
"documents": [],
}
supported_by_statement_ids = set()
for s in civic_data['statements']:
if s['id'] == 'civic.aid:6':
supported_by_statement_ids = \
{s for s in s['supported_by'] if s.startswith('civic.eid')}
supported_by_statement_ids.add(s['id'])
for s in civic_data["statements"]:
if s["id"] == "civic.aid:6":
supported_by_statement_ids = {
s for s in s["supported_by"] if s.startswith("civic.eid")
}
supported_by_statement_ids.add(s["id"])
break

proposition_ids = set()
Expand All @@ -32,56 +33,66 @@ def create_civic_example(civic_data):
gids = set()
methods = set()
documents = set()
for s in civic_data['statements']:
if s['id'] in supported_by_statement_ids:
ex['statements'].append(s)
proposition_ids.add(s['proposition'])
vids.add(s['variation_descriptor'])
tids.add(s['therapy_descriptor'])
dids.add(s['disease_descriptor'])
methods.add(s['method'])
documents.update({d for d in s['supported_by'] if
not d.startswith('civic.eid')})
for s in civic_data["statements"]:
if s["id"] in supported_by_statement_ids:
ex["statements"].append(s)
proposition_ids.add(s["proposition"])
vids.add(s["variation_descriptor"])
tids.add(s["therapy_descriptor"])
dids.add(s["disease_descriptor"])
methods.add(s["method"])
documents.update(
{d for d in s["supported_by"] if not d.startswith("civic.eid")}
)

for p in civic_data['propositions']:
if p['id'] in proposition_ids:
ex['propositions'].append(p)
for p in civic_data["propositions"]:
if p["id"] in proposition_ids:
ex["propositions"].append(p)

for v in civic_data['variation_descriptors']:
if v['id'] in vids:
ex['variation_descriptors'].append(v)
gids.add(v['gene_context'])
for v in civic_data["variation_descriptors"]:
if v["id"] in vids:
ex["variation_descriptors"].append(v)
gids.add(v["gene_context"])

for t in civic_data['therapy_descriptors']:
if t['id'] in tids:
ex['therapy_descriptors'].append(t)
for t in civic_data["therapy_descriptors"]:
if t["id"] in tids:
ex["therapy_descriptors"].append(t)

for d in civic_data['disease_descriptors']:
if d['id'] in dids:
ex['disease_descriptors'].append(d)
for d in civic_data["disease_descriptors"]:
if d["id"] in dids:
ex["disease_descriptors"].append(d)

for g in civic_data['gene_descriptors']:
if g['id'] in gids:
ex['gene_descriptors'].append(g)
for g in civic_data["gene_descriptors"]:
if g["id"] in gids:
ex["gene_descriptors"].append(g)

for m in civic_data['methods']:
if m['id'] in methods:
ex['methods'].append(m)
for m in civic_data["methods"]:
if m["id"] in methods:
ex["methods"].append(m)

for d in civic_data['documents']:
if d['id'] in documents:
ex['documents'].append(d)
for d in civic_data["documents"]:
if d["id"] in documents:
ex["documents"].append(d)

with open(PROJECT_ROOT / "analysis" / "civic" / "examples" / # noqa: W504
"transform" / "civic_cdm_example.json", 'w+') as f2:
with open(
PROJECT_ROOT
/ "analysis"
/ "civic"
/ "examples"
/ "transform"
/ "civic_cdm_example.json",
"w+",
) as f2:
json.dump(ex, f2, indent=4)


if __name__ == '__main__':
if __name__ == "__main__":
civic = CIViCTransform()
civic.transform()
civic.create_json()
latest = sorted((APP_ROOT / "data" / "civic" / "transform").glob("civic_cdm_*.json"))[-1] # noqa: E501
latest = sorted(
(APP_ROOT / "data" / "civic" / "transform").glob("civic_cdm_*.json")
)[-1]
with open(latest, "r") as f:
civic_data = json.load(f)
create_civic_example(civic_data)
10 changes: 5 additions & 5 deletions analysis/graph/db_helper.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
"""Utility function to load/reload graph for development."""
from metakb.database import Graph
from metakb import APP_ROOT
import json

from metakb import APP_ROOT
from metakb.database import Graph

g = Graph(uri="bolt://localhost:7687", credentials=("neo4j", "admin"))
g.clear()

fpath = APP_ROOT / 'data' / 'civic' / 'transform' / 'civic_cdm.json'
with open(fpath, 'r') as f:
fpath = APP_ROOT / "data" / "civic" / "transform" / "civic_cdm.json"
with open(fpath, "r") as f:
items = json.load(f)

count = 0
for item in items:
if 'assertion' in item.keys():
if "assertion" in item.keys():
continue
else:
g.add_transformed_data(item)
Expand Down
2 changes: 2 additions & 0 deletions analysis/graph/missing_diseases_counts.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
TALL and T-Cell Acute Lymphoid Leukemia, 6
T-Cell Acute Lymphoid Leukemia, 6
Loading

0 comments on commit ea42de7

Please sign in to comment.