Skip to content

Commit

Permalink
add bioentities and terms to the index
Browse files Browse the repository at this point in the history
  • Loading branch information
sierra-moxon committed Jan 25, 2025
1 parent 2edab99 commit c6bbd9e
Showing 1 changed file with 15 additions and 6 deletions.
21 changes: 15 additions & 6 deletions scripts/gen-model-meta.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
"""
Script to generate model indicies. to run:
Script to generate model indexes. to run:
% cd scripts
% poetry install
% poetry run python gen-model-meta.py --keys-to-index contributor,term_id,reference_id --output-dir /tmp/output
% poetry run python gen-model-meta.py --keys-to-index contributor --keys-to-index providedBy --output-dir /tmp/output
"""
import os
import json
import sys
import logging
from pathlib import Path
from pprint import pprint

import click

Expand Down Expand Up @@ -44,14 +43,23 @@ def process_json_files(keys_to_index, output_dir, path_to_json=json_path):
# Open, parse, and save the JSON in a dictionary file
file_url = os.path.join(path_to_json, file_name)
with open(file_url, "r") as f:
print(model_id)
read_data = json.load(f)

if not read_data:
die_screaming(f"ERROR: No data in file: {file_url}")

# Create indices for the current file
individuals = read_data.get("individuals", [])
if "bioentity_id" or "term_id" in keys_to_index:
for individual in individuals:
types = individual.get("type", [])
for entity_type in types:
entity_id = entity_type.get("id")
if entity_id not in indices["entity"]:
indices["entity"][entity_id] = []
if model_id not in indices["entity"][entity_id]:
indices["entity"][entity_id].append(model_id)

for individual in individuals:
annotations = individual.get("annotations", [])
for annotation in annotations:
Expand All @@ -62,9 +70,10 @@ def process_json_files(keys_to_index, output_dir, path_to_json=json_path):
indices[key][value] = []
if model_id not in indices[key][value]:
indices[key][value].append(model_id)

# for each top level key in the indicies dictionary, write out the JSON to a file
for key, value in indices.items():
with open(os.path.join(output_dir, f"{key}.json"), "w") as f:
with open(os.path.join(output_dir, f"{key}_index.json"), "w") as f:
json.dump(value, f, indent=4)


Expand All @@ -75,7 +84,7 @@ def process_json_files(keys_to_index, output_dir, path_to_json=json_path):
"-k",
multiple=True,
required=True,
help="List of keys to index (e.g., contributor, term_id, reference_id).",
help="List of keys to index (e.g., contributor, bioentity_id, term_id).",
)
@click.option(
"--output-dir",
Expand Down

0 comments on commit c6bbd9e

Please sign in to comment.