Skip to content

Commit

Permalink
Merge pull request #7 from smart-on-fhir/mg/subtables
Browse files Browse the repository at this point in the history
Added convenience subtables
  • Loading branch information
dogversioning authored Aug 9, 2024
2 parents 0503cac + 8e78b7f commit c387a1c
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 15 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'
- name: Get library from main
run: pip install git+https://github.com/smart-on-fhir/cumulus-library.git
- name: Install linters
run: |
python -m pip install --upgrade pip
Expand All @@ -29,7 +31,10 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'

- name: Get library from main
run: pip install git+https://github.com/smart-on-fhir/cumulus-library.git

- name: Install dependencies
run: |
Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,17 @@ Note: This study is explicitly namespaced in its own schema, `umls`. Make sure y
database is not using this schema for another use. Do not create tables inside this
schema by another means.

## Additional custom tables

The following tables are a derived from the primary tables, and are included here as a
convenience to avoid having to compute these on a repeated basis

- **mrrel_is_a** a subset of the relationships in mrrel, including only those that define
that concept A is a member of concept B (i.e. is a child, or is explicitly marked as
being a tradename/member belonging to the parent concept).
- **mrconso_drugs** a subset of the entity list in mrconso, limited to vocabularies
specifically dealing with drug identifiers (i.e. SNOMED, RxNorm, etc.)

## Licensing details

The `cumulus-library-umls` study is provided as a convenience to install the
Expand Down
1 change: 1 addition & 0 deletions cumulus_library_umls/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "1.0.0"
23 changes: 18 additions & 5 deletions cumulus_library_umls/umls/umls_builder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pathlib

import pandas
from cumulus_library import base_table_builder, base_utils, databases
from cumulus_library import base_table_builder, base_utils, study_manifest
from cumulus_library.apis import umls
from cumulus_library.template_sql import base_templates

Expand Down Expand Up @@ -145,10 +145,9 @@ def create_parquet(

def prepare_queries(
self,
cursor: databases.DatabaseCursor,
schema: str,
config: base_utils.StudyConfig,
manifest: study_manifest.StudyManifest,
*args,
config=base_utils.StudyConfig,
**kwargs,
):
download_path = pathlib.Path(__file__).resolve().parent / "downloads"
Expand Down Expand Up @@ -184,7 +183,7 @@ def prepare_queries(
)
self.queries.append(
base_templates.get_ctas_from_parquet_query(
schema_name=schema,
schema_name=config.schema,
table_name=f"umls__{file.stem}",
local_location=parquet_path / f"{file.stem}.parquet",
remote_location=remote_path,
Expand All @@ -193,3 +192,17 @@ def prepare_queries(
)
)
progress.advance(task)

# Section for resuable cross-study helper tables
self.queries.append(
"""CREATE TABLE umls__mrrel_is_a AS
SELECT * FROM umls.mrrel
WHERE REL = 'CHD'
OR RELA in ('isa','tradename_of','has_tradename','has_basis_of_strength_substance')"""
)
self.queries.append(
"""CREATE TABLE umls__mrconso_drugs AS
SELECT * FROM umls.mrconso
WHERE SAB in ('ATC','CVX','DRUGBANK','GS','MMSL','MMX','MTHCMSFRFMTHSPL','NDDF',
'RXNORM','SNOMEDCT_US','USP','VANDF')"""
)
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
[project]
version = "0.2.0"
name = "cumulus-library-umls"
requires-python = ">= 3.10"
requires-python = ">= 3.11"
dependencies = [
"cumulus-library >= 2.3",
"cumulus-library >= 3.0",
]
description = "A Unified Medical Language System® Metathesaurus study for the Cumulus project"
readme = "README.md"
Expand All @@ -15,6 +14,7 @@ classifiers = [
"Programming Language :: Python :: 3",
"Topic :: Software Development :: Libraries :: Python Modules",
]
dynamic=["version"]
[project.optional-dependencies]
dev = [
"ruff == 0.2.1",
Expand Down
16 changes: 11 additions & 5 deletions tests/test_umls_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest
import responses
from cumulus_library import base_utils, databases, db_config
from cumulus_library import base_utils, databases, db_config, study_manifest

from cumulus_library_umls.umls import umls_builder

Expand Down Expand Up @@ -59,10 +59,13 @@ def test_create_query(mock_resolve, mock_responses, tmp_path):

db_config.db_type = "duckdb"
config = base_utils.StudyConfig(
db=databases.DuckDatabaseBackend(f"{tmp_path}/duckdb"), umls_key="123"
db=databases.DuckDatabaseBackend(f"{tmp_path}/duckdb"),
umls_key="123",
schema="main",
)
builder = umls_builder.UMLSBuilder()
builder.prepare_queries(cursor=config.db.cursor(), schema="main", config=config)
manifest = study_manifest.StudyManifest()
builder.prepare_queries(config=config, manifest=manifest)
expected = f"""CREATE TABLE IF NOT EXISTS umls__TESTTABLE AS SELECT
TTY,
CODE
Expand All @@ -88,10 +91,13 @@ def test_create_query_download_exists(mock_resolve, mock_responses, tmp_path):

db_config.db_type = "duckdb"
config = base_utils.StudyConfig(
db=databases.DuckDatabaseBackend(f"{tmp_path}/duckdb"), umls_key="123"
db=databases.DuckDatabaseBackend(f"{tmp_path}/duckdb"),
umls_key="123",
schema="main",
)
builder = umls_builder.UMLSBuilder()
builder.prepare_queries(cursor=config.db.cursor(), schema="main", config=config)
manifest = study_manifest.StudyManifest()
builder.prepare_queries(config=config, manifest=manifest)
download_dirs = sorted((tmp_path / "downloads").iterdir())
assert len(download_dirs) == 1
assert "2000AA" in str(download_dirs[0])
Expand Down

0 comments on commit c387a1c

Please sign in to comment.