Skip to content

Commit

Permalink
Feature bio checks (#274)
Browse files Browse the repository at this point in the history
* Added bio checks module

* Added bio checks and logic check

* Added kwargs to the add rule method
  • Loading branch information
canimus authored Jul 7, 2024
1 parent 21602d8 commit 69eb7e7
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 9 deletions.
12 changes: 7 additions & 5 deletions cuallee/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from toolz.curried import map as map_curried

logger = logging.getLogger("cuallee")
__version__ = "0.10.2"
__version__ = "0.12.2"
# Verify Libraries Available
# ==========================
try:
Expand Down Expand Up @@ -245,10 +245,12 @@ def __init__(
self.table_name = table_name
try:
from .iso.checks import ISO
from .bio.checks import BioChecks

self.iso = ISO(self)
except (ModuleNotFoundError, ImportError):
logger.error("ISO module requires requests")
self.bio = BioChecks(self)
except (ModuleNotFoundError, ImportError) as err:
logger.error(f"Dependency modules missing: {str(err)}")
self.session = session

def __repr__(self):
Expand Down Expand Up @@ -287,15 +289,15 @@ def _remove_rule_generic(self, key: str):
if key in self._rule:
self._rule.pop(key)

def add_rule(self, method: str, *arg):
def add_rule(self, method: str, *arg, **kwargs):
"""
Add a new rule to the Check class.
Args:
method (str): Check name
arg (list): Parameters of the check
"""
return operator.methodcaller(method, *arg)(self)
return operator.methodcaller(method, *arg, **kwargs)(self)

def delete_rule_by_key(self, keys: Union[str, List[str]]):
"""
Expand Down
Empty file added cuallee/bio/__init__.py
Empty file.
23 changes: 23 additions & 0 deletions cuallee/bio/amino_acids.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
amino_acid,3_letter_code,1_letter_code
alanine,ala,A
arginine,arg,R
asparagine,asn,N
aspartic acid,asp,D
asparagine or aspartic acid,asx,B
cysteine,cys,C
glutamic acid,glu,E
glutamine,gln,Q
glutamine or glutamic acid,glx,Z
glycine,gly,G
histidine,his,H
isoleucine,ile,I
leucine,leu,L
lysine,lys,K
methionine,met,M
phenylalanine,phe,F
proline,pro,P
serine,ser,S
threonine,thr,T
tryptophan,trp,W
tyrosine,tyr,Y
valine,val,V
25 changes: 25 additions & 0 deletions cuallee/bio/checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from .. import Check
import pandas as pd
from pathlib import Path


class BioChecks:
def __init__(self, check: Check):
self._check = check
try:
parent_path = Path(__file__).parent
self._aminoacids = pd.read_csv(parent_path / "amino_acids.csv")
except Exception:
raise Exception("Unable to load aminoacid definitions")

def is_dna(self, column: str):
"""Validates that a sequence contains only valid nucleotide bases of DNA strand"""
self._check.has_pattern(column, r"^[GTCA]*$")
return self._check

def is_protein(self, column: str):
"""Verifies that country codes are valid against the ISO standard 3166"""
self._check.has_pattern(
column, rf"^[{''.join(self._aminoacids['1_letter_code'].tolist())}]*$"
)
return self._check
9 changes: 9 additions & 0 deletions cuallee/logic/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .. import Check
from typing import Dict


class LogicCheck:
"""Performs logical inference on evaluated checks"""

def __init__(self, checks: Dict[str, Check] = {}):
pass
5 changes: 3 additions & 2 deletions pyproject.toml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "cuallee"
version = "0.12.1"
version = "0.12.2"
authors = [
{ name="Herminio Vazquez", email="[email protected]"},
{ name="Virginie Grosboillot", email="[email protected]" }
Expand Down Expand Up @@ -78,4 +78,5 @@ ignore = ["E731"]

[project.urls]
"Homepage" = "https://github.com/canimus/cuallee"
"Bug Tracker" = "https://github.com/canimus/cuallee"
"Bug Tracker" = "https://github.com/canimus/cuallee"

5 changes: 3 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[metadata]
name = cuallee
version = 0.12.1
version = 0.12.2
[options]
packages = find:
packages = find:
include_package_data = True
1 change: 1 addition & 0 deletions test/unit/dagster_checks/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pandas as pd
from typing import Iterator


def test_make_checks():
df = pd.DataFrame({"id": [1, 2, 3, 4, 5]})
check = Check(CheckLevel.WARNING, "Dagster")
Expand Down

0 comments on commit 69eb7e7

Please sign in to comment.