Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added convenience subtables #7

Merged
merged 3 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'
- name: Get library from main
run: pip install git+https://github.com/smart-on-fhir/cumulus-library.git
- name: Install linters
run: |
python -m pip install --upgrade pip
Expand All @@ -29,7 +31,10 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'

- name: Get library from main
run: pip install git+https://github.com/smart-on-fhir/cumulus-library.git

- name: Install dependencies
run: |
Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,17 @@ Note: This study is explicitly namespaced in its own schema, `umls`. Make sure y
database is not using this schema for another use. Do not create tables inside this
schema by another means.

## Additional custom tables

The following tables are a derived from the primary tables, and are included here as a
convenience to avoid having to compute these on a repeated basis

- **mrrel_is_a** a subset of the relationships in mrrel, including only those that define
that concept A is a member of concept B (i.e. is a child, or is explicitly marked as
being a tradename/member belonging to the parent concept).
- **mrconso_drugs** a subset of the entity list in mrconso, limited to vocabularies
specifically dealing with drug identifiers (i.e. SNOMED, RxNorm, etc.)

## Licensing details

The `cumulus-library-umls` study is provided as a convenience to install the
Expand Down
1 change: 1 addition & 0 deletions cumulus_library_umls/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "1.0.0"
23 changes: 18 additions & 5 deletions cumulus_library_umls/umls/umls_builder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pathlib

import pandas
from cumulus_library import base_table_builder, base_utils, databases
from cumulus_library import base_table_builder, base_utils, study_manifest
from cumulus_library.apis import umls
from cumulus_library.template_sql import base_templates

Expand Down Expand Up @@ -145,10 +145,9 @@ def create_parquet(

def prepare_queries(
self,
cursor: databases.DatabaseCursor,
schema: str,
config: base_utils.StudyConfig,
manifest: study_manifest.StudyManifest,
*args,
config=base_utils.StudyConfig,
**kwargs,
):
download_path = pathlib.Path(__file__).resolve().parent / "downloads"
Expand Down Expand Up @@ -184,7 +183,7 @@ def prepare_queries(
)
self.queries.append(
base_templates.get_ctas_from_parquet_query(
schema_name=schema,
schema_name=config.schema,
table_name=f"umls__{file.stem}",
local_location=parquet_path / f"{file.stem}.parquet",
remote_location=remote_path,
Expand All @@ -193,3 +192,17 @@ def prepare_queries(
)
)
progress.advance(task)

# Section for resuable cross-study helper tables
self.queries.append(
"""CREATE TABLE umls__mrrel_is_a AS
SELECT * FROM umls.mrrel
WHERE REL = 'CHD'
OR RELA in ('isa','tradename_of','has_tradename','has_basis_of_strength_substance')"""
)
self.queries.append(
"""CREATE TABLE umls__mrconso_drugs AS
SELECT * FROM umls.mrconso
WHERE SAB in ('ATC','CVX','DRUGBANK','GS','MMSL','MMX','MTHCMSFRFMTHSPL','NDDF',
'RXNORM','SNOMEDCT_US','USP','VANDF')"""
)
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
[project]
version = "0.2.0"
name = "cumulus-library-umls"
requires-python = ">= 3.10"
requires-python = ">= 3.11"
dependencies = [
"cumulus-library >= 2.3",
"cumulus-library >= 3.0",
]
description = "A Unified Medical Language System® Metathesaurus study for the Cumulus project"
readme = "README.md"
Expand All @@ -15,6 +14,7 @@ classifiers = [
"Programming Language :: Python :: 3",
"Topic :: Software Development :: Libraries :: Python Modules",
]
dynamic=["version"]
[project.optional-dependencies]
dev = [
"ruff == 0.2.1",
Expand Down
16 changes: 11 additions & 5 deletions tests/test_umls_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import pytest
import responses
from cumulus_library import base_utils, databases, db_config
from cumulus_library import base_utils, databases, db_config, study_manifest

from cumulus_library_umls.umls import umls_builder

Expand Down Expand Up @@ -59,10 +59,13 @@ def test_create_query(mock_resolve, mock_responses, tmp_path):

db_config.db_type = "duckdb"
config = base_utils.StudyConfig(
db=databases.DuckDatabaseBackend(f"{tmp_path}/duckdb"), umls_key="123"
db=databases.DuckDatabaseBackend(f"{tmp_path}/duckdb"),
umls_key="123",
schema="main",
)
builder = umls_builder.UMLSBuilder()
builder.prepare_queries(cursor=config.db.cursor(), schema="main", config=config)
manifest = study_manifest.StudyManifest()
builder.prepare_queries(config=config, manifest=manifest)
expected = f"""CREATE TABLE IF NOT EXISTS umls__TESTTABLE AS SELECT
TTY,
CODE
Expand All @@ -88,10 +91,13 @@ def test_create_query_download_exists(mock_resolve, mock_responses, tmp_path):

db_config.db_type = "duckdb"
config = base_utils.StudyConfig(
db=databases.DuckDatabaseBackend(f"{tmp_path}/duckdb"), umls_key="123"
db=databases.DuckDatabaseBackend(f"{tmp_path}/duckdb"),
umls_key="123",
schema="main",
)
builder = umls_builder.UMLSBuilder()
builder.prepare_queries(cursor=config.db.cursor(), schema="main", config=config)
manifest = study_manifest.StudyManifest()
builder.prepare_queries(config=config, manifest=manifest)
download_dirs = sorted((tmp_path / "downloads").iterdir())
assert len(download_dirs) == 1
assert "2000AA" in str(download_dirs[0])
Expand Down