From 03f47b457bef22c88dfbb16300fde090750a881b Mon Sep 17 00:00:00 2001 From: Matt Garber Date: Fri, 9 Aug 2024 09:27:22 -0400 Subject: [PATCH] Added convenience subtables This adds two derived tables, mrrel_is_a, and mrconso_drugs, in order to speed up long queries in vocab lookup scenarios. It also bumps the version as part of the library 3.0 cutover, and moves the version definition to __init__. --- README.md | 11 +++++++++++ cumulus_library_umls/__init__.py | 1 + cumulus_library_umls/umls/umls_builder.py | 23 ++++++++++++++++++----- pyproject.toml | 6 +++--- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index bf32867..b954b82 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,17 @@ Note: This study is explicitly namespaced in its own schema, `umls`. Make sure y database is not using this schema for another use. Do not create tables inside this schema by another means. +## Additional custom tables + +The following tables are a derived from the primary tables, and are included here as a +convenience to avoid having to compute these on a repeated basis + +- **mrrel_is_a** a subset of the relationships in mrrel, including only those that define +that concept A is a member of concept B (i.e. is a child, or is explicitly marked as +being a tradename/member belonging to the parent concept). +- **mrconso_drugs** a subset of the entity list in mrconso, limited to vocabularies +specifically dealing with drug identifiers (i.e. SNOMED, RxNorm, etc.) + ## Licensing details The `cumulus-library-umls` study is provided as a convenience to install the diff --git a/cumulus_library_umls/__init__.py b/cumulus_library_umls/__init__.py index e69de29..5becc17 100644 --- a/cumulus_library_umls/__init__.py +++ b/cumulus_library_umls/__init__.py @@ -0,0 +1 @@ +__version__ = "1.0.0" diff --git a/cumulus_library_umls/umls/umls_builder.py b/cumulus_library_umls/umls/umls_builder.py index f7ab343..ecd4626 100644 --- a/cumulus_library_umls/umls/umls_builder.py +++ b/cumulus_library_umls/umls/umls_builder.py @@ -1,7 +1,7 @@ import pathlib import pandas -from cumulus_library import base_table_builder, base_utils, databases +from cumulus_library import base_table_builder, base_utils, study_manifest from cumulus_library.apis import umls from cumulus_library.template_sql import base_templates @@ -145,10 +145,9 @@ def create_parquet( def prepare_queries( self, - cursor: databases.DatabaseCursor, - schema: str, + config: base_utils.StudyConfig, + manifest: study_manifest.StudyManifest, *args, - config=base_utils.StudyConfig, **kwargs, ): download_path = pathlib.Path(__file__).resolve().parent / "downloads" @@ -184,7 +183,7 @@ def prepare_queries( ) self.queries.append( base_templates.get_ctas_from_parquet_query( - schema_name=schema, + schema_name=config.schema, table_name=f"umls__{file.stem}", local_location=parquet_path / f"{file.stem}.parquet", remote_location=remote_path, @@ -193,3 +192,17 @@ def prepare_queries( ) ) progress.advance(task) + + # Section for resuable cross-study helper tables + self.queries.append( + """CREATE TABLE umls__mrrel_is_a AS +SELECT * FROM umls.mrrel +WHERE REL = 'CHD' +OR RELA in ('isa','tradename_of','has_tradename','has_basis_of_strength_substance')""" + ) + self.queries.append( + """CREATE TABLE umls__mrconso_drugs AS +SELECT * FROM umls.mrconso +WHERE SAB in ('ATC','CVX','DRUGBANK','GS','MMSL','MMX','MTHCMSFRFMTHSPL','NDDF', + 'RXNORM','SNOMEDCT_US','USP','VANDF')""" + ) diff --git a/pyproject.toml b/pyproject.toml index a4916a2..ab0c563 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,8 @@ [project] -version = "0.2.0" name = "cumulus-library-umls" -requires-python = ">= 3.10" +requires-python = ">= 3.11" dependencies = [ - "cumulus-library >= 2.3", + "cumulus-library >= 3.0", ] description = "A Unified Medical Language System® Metathesaurus study for the Cumulus project" readme = "README.md" @@ -15,6 +14,7 @@ classifiers = [ "Programming Language :: Python :: 3", "Topic :: Software Development :: Libraries :: Python Modules", ] +dynamic=["version"] [project.optional-dependencies] dev = [ "ruff == 0.2.1",