Skip to content

Commit

Permalink
Tests/tes model tests (#34)
Browse files Browse the repository at this point in the history
Co-authored-by: salihuDickson <[email protected]>
  • Loading branch information
SalihuDickson and salihuDickson authored Oct 16, 2024
1 parent bdfe4cc commit dc6b1fa
Show file tree
Hide file tree
Showing 6 changed files with 366 additions and 293 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ jobs:
- name: Lint with Ruff
run: |
poetry run ruff check crategen/
if: ${{ success() }}
- name: Type check with Mypy
run: |
Expand All @@ -42,4 +41,4 @@ jobs:
- name: Run tests
run: |
poetry run pytest --cov=crategen
poetry run pytest --cov=crategen
46 changes: 41 additions & 5 deletions crategen/converters/utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
"""Utility functions for handling data conversion."""

import datetime
import os
import re


def convert_to_iso8601(timestamp):
"""Convert a given timestamp to ISO 8601 format.
Handles multiple formats including RFC 3339, ISO 8601 with and without fractional seconds.
Args:
timestamp (str): The timestamp to be converted.
Expand All @@ -16,15 +18,49 @@ def convert_to_iso8601(timestamp):
"""
if timestamp:
formats = [
"%Y-%m-%dT%H:%M:%S.%fZ",
"%Y-%m-%dT%H:%M:%SZ",
"%Y-%m-%dT%H:%M:%S%z",
"%Y-%m-%dT%H:%M:%S.%fZ",
"%Y-%m-%dT%H:%M:%SZ",
"%Y-%m-%dT%H:%M:%S%z",
"%Y-%m-%dT%H:%M:%S.%f%z",
]
for fmt in formats:
try:
return datetime.datetime.strptime(timestamp, fmt).isoformat() + "Z"
return datetime.datetime.strptime(timestamp, fmt).isoformat("T") + "Z"
except ValueError:
continue
return None
return None


# This function does not have to rock solid, it supposed to help users not restrict them
# And due to the difficulty in validating all posible types of file paths it has been not been written to be very stringent
def is_absolute_path(path):
"""Checks if a given path is an absolute path, including support for
Windows paths, Amazon S3 paths, and URL-like paths.
Args:
path: The path string to check.
Returns:
True if the path is an absolute path, False otherwise.
"""
# Windows absolute paths
if re.match(r"^[a-zA-Z0-9]+:\\", path):
path_after_protocol = path[path.index(":\\") + 2]
return bool(path_after_protocol)

# UNC paths
if re.match(r"^\\\\", path):
path_after_protocol = path[path.index("\\") + 2]
return bool(path_after_protocol)

# URL-like paths and paths with similar protocols like amazon s3 paths
if re.match(r"^[a-zA-Z0-9]+://", path):
path_after_protocol = path[path.index("://") + 3]
return bool(path_after_protocol)

# POSIX absolute paths (Linux/macOS)
if os.path.isabs(path):
return True

return False
86 changes: 46 additions & 40 deletions crategen/models/tes_models.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
"""Each model in this module conforms to the corresponding TES model names as specified by the GA4GH schema (https://ga4gh.github.io/task-execution-schemas/docs/)."""

import os
from datetime import datetime
from enum import Enum
from typing import Optional

from pydantic import AnyUrl, BaseModel, root_validator, validator
from rfc3339_validator import validate_rfc3339 # type: ignore

from ..converters.utils import convert_to_iso8601
from ..converters.utils import is_absolute_path


class TESFileType(str, Enum):
Expand Down Expand Up @@ -79,16 +78,19 @@ class TESExecutorLog(BaseModel):
Reference: https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask
"""

start_time: Optional[datetime] = None
end_time: Optional[datetime] = None
start_time: Optional[str] = None
end_time: Optional[str] = None
stdout: Optional[str] = None
stderr: Optional[str] = None
exit_code: int

@validator("start_time", "end_time", pre=True, always=True)
def validate_datetime(cls, value):
"""Convert start and end times to RFC 3339 format."""
return convert_to_iso8601(value)
@validator("start_time", "end_time")
def validate_datetime(cls, value, field):
"""Check correct datetime format"""
if(validate_rfc3339(value)):
return value
else:
raise ValueError(f"The '{field.name}' property must be in the rfc3339 format")


class TESExecutor(BaseModel):
Expand Down Expand Up @@ -119,8 +121,8 @@ class TESExecutor(BaseModel):
@validator("stdin", "stdout")
def validate_stdin_stdin(cls, value, field):
"""Ensure that 'stdin' and 'stdout' are absolute paths."""
if value and not os.path.isabs(value):
raise ValueError(f"The '{field.name}' attribute must contain an absolute path.")
if value and not is_absolute_path(value):
raise ValueError(f"The '{field.name}' property must be an absolute path.")
return value


Expand Down Expand Up @@ -160,33 +162,33 @@ class TESInput(BaseModel):

name: Optional[str] = None
description: Optional[str] = None
url: Optional[AnyUrl]
url: Optional[AnyUrl] = None
path: str
type: Optional[TESFileType] = None
type: Optional[TESFileType] = TESFileType.FILE
content: Optional[str] = None

@root_validator()
def validate_content_and_url(cls, values):
"""If content is set url should be ignored.
If content is not set then url should be present.
"""- If content is set url should be ignored.
- If content is not set then url should be present.
"""
content_is_set = values.get("content") and values.get("content").strip()
url_is_set = values.get("url") and values.get("url").strip()
content_is_set = bool(values.get("content") and values.get("content").strip())
url_is_set = bool(values.get("url") and values.get("url").strip())

if content_is_set:
values["url"] = None
elif not url_is_set:
elif not url_is_set and not content_is_set:
print("the url", values.get("path"))
raise ValueError(
"The 'url' attribute is required when the 'content' attribute is empty"
"Either the 'url' or 'content' properties must be set"
)
return values

@validator("path")
def validate_path(cls, value):
"""Validate that the path is an absolute path."""
if not os.path.isabs(value):
raise ValueError("The 'path' attribute must contain an absolute path.")
if not is_absolute_path(value):
raise ValueError("The 'path' property must be an absolute path.")
return value


Expand All @@ -197,7 +199,6 @@ class TESOutput(BaseModel):
name: User-provided name of output file
description: Optional users provided description field, can be used for documentation.
url: URL for the file to be copied by the TES server after the task is complete
path_prefix: The path prefix used when 'path' contains wildcards.
path: Path of the file inside the container. Must be an absolute path.
type: The type of output (e.g., FILE, DIRECTORY).
Expand All @@ -207,17 +208,14 @@ class TESOutput(BaseModel):
name: Optional[str] = None
description: Optional[str] = None
url: AnyUrl
path_prefix: Optional[str] = None
path: str
type: Optional[TESFileType] = None
type: Optional[TESFileType] = TESFileType.FILE

@validator("path")
def validate_path(cls, value, values):
def validate_path(cls, value):
"""Ensure that 'path' is an absolute path and handle wildcards."""
if not os.path.isabs(value):
raise ValueError("The 'path' attribute must contain an absolute path.")
if any(char in value for char in ['*', '?', '[', ']']) and not values.get("path_prefix"):
raise ValueError("When 'path' contains wildcards, 'path_prefix' is required.")
if not is_absolute_path(value):
raise ValueError("The 'path' property must be an absolute path.")
return value


Expand All @@ -231,23 +229,24 @@ class TESTaskLog(BaseModel):
end_time: When the task ended, in RFC 3339 format.
outputs: Information about all output files. Directory outputs are flattened into separate items.
system_logs: System logs are any logs the system decides are relevant, which are not tied directly to an Executor process. Content is implementation specific: format, size, etc.
ignore_error: If true, errors in this executor will be ignored.
Reference: [https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask](https://ga4gh.github.io/task-execution-schemas/docs/#operation/GetTask)
"""

logs: list[TESExecutorLog]
metadata: Optional[dict[str, str]]
start_time: Optional[datetime]
end_time: Optional[datetime]
metadata: Optional[dict[str, str]] = None
start_time: Optional[str] = None
end_time: Optional[str] = None
outputs: list[TESOutputFileLog]
system_logs: Optional[list[str]]
ignore_error: Optional[bool] = False
system_logs: Optional[list[str]] = None

@validator("start_time", "end_time", pre=True, always=True)
def validate_datetime(cls, value):
"""Convert start and end times to RFC 3339 format."""
return convert_to_iso8601(value)
def validate_datetime(cls, value, field):
"""Check correct datetime format"""
if(validate_rfc3339(value)):
return value
else:
raise ValueError(f"The '{field.name}' property must be in the rfc3339 format")


class TESData(BaseModel):
Expand All @@ -273,7 +272,7 @@ class TESData(BaseModel):
id: str
name: Optional[str] = None
description: Optional[str] = None
creation_time: Optional[datetime] = None
creation_time: Optional[str] = None
state: Optional[TESState] = TESState.UNKNOWN
inputs: Optional[list[TESInput]] = None
outputs: Optional[list[TESOutput]] = None
Expand All @@ -282,3 +281,10 @@ class TESData(BaseModel):
volumes: Optional[list[str]] = None
logs: Optional[list[TESTaskLog]] = None
tags: Optional[dict[str, str]] = None

@validator("creation_time")
def validate_datetime(value, field):
if(validate_rfc3339(value)):
return value
else:
raise ValueError(f"The '{field.name}' property must be in the rfc3339 format")
27 changes: 26 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 9 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ requests = "^2.25.1"
pytest = "^8.3.1"
pytest-cov = "^5.0.0"
pytest-mock = "^3.14.0"
rfc3339-validator = "^0.1.4"

[tool.poetry.dev-dependencies]
pre-commit = "^2.13.0"
Expand Down Expand Up @@ -61,9 +62,13 @@ skips = [

[tool.ruff]
exclude = [
"tests/*",
"tests/unit/*",
"crategen/*"
".git",
"/.pytest_cache",
"__pycache__",
"build",
"_build",
"dist",
".env",
]
indent-width = 4

Expand All @@ -76,15 +81,14 @@ quote-style = "double"
[tool.ruff.lint]
select = [
"B", # flake8-bugbear
"D", # pydocstyle
"E", # pycodestyle
"F", # Pyflakes
"I", # isort
"PL", # pylint
"SIM", # flake8-simplify
"UP", # pyupgrade
]
ignore = ["E501"]
ignore = ["E501", "E203"]
fixable = ["ALL"]

[tool.ruff.lint.pydocstyle]
Expand Down
Loading

0 comments on commit dc6b1fa

Please sign in to comment.