Skip to content

Commit

Permalink
Chore/refactor wes models (#28)
Browse files Browse the repository at this point in the history
Co-authored-by: salihuDickson <[email protected]>
  • Loading branch information
SalihuDickson and SalihuDickson authored Sep 12, 2024
1 parent 71b1143 commit 9f5b887
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 64 deletions.
1 change: 0 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ jobs:
- name: Lint with Ruff
run: |
poetry run ruff check crategen/
if: ${{ success() }}
- name: Type check with Mypy
run: |
Expand Down
24 changes: 14 additions & 10 deletions crategen/converters/tes_converter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from datetime import datetime

from pydantic import AnyUrl, ValidationError

Expand Down Expand Up @@ -52,7 +51,7 @@ def convert_to_wrroc(self, data: dict) -> dict:
"startTime": data_tes.creation_time,
"endTime": end_time,
}

validate_wrroc_tes(wrroc_data)
return wrroc_data

Expand All @@ -76,23 +75,28 @@ def convert_from_wrroc(self, data: dict) -> dict:
raise ValueError(f"Invalid WRROC data: {e.errors()}") from e

# Convert URL strings to AnyUrl
tes_inputs = [TESInput(url=AnyUrl(url=obj.id), path=obj.name) for obj in data_wrroc.object]
tes_outputs = [TESOutput(url=AnyUrl(url=res.id), path=res.name) for res in data_wrroc.result]
tes_inputs = [
TESInput(url=AnyUrl(url=obj.id), path=obj.name) for obj in data_wrroc.object
]
tes_outputs = [
TESOutput(url=AnyUrl(url=data_wrroc.result.id), path=data_wrroc.result.name)
]

# Ensure 'image' and 'command' fields are provided
tes_executors = [TESExecutor(image=data_wrroc.instrument or "", command=[])] # Provide default empty list for command
tes_executors = [
TESExecutor(image=data_wrroc.instrument or "", command=[])
] # Provide default empty list for command

# Ensure correct type for end_time (datetime)
end_time = datetime.fromisoformat(data_wrroc.endTime) if data_wrroc.endTime else None

tes_logs = [
TESTaskLog(
logs=[],
metadata=None,
start_time=None,
end_time=end_time,
end_time=data_wrroc.endTime,
outputs=[],
system_logs=None
system_logs=None,
)
]

Expand All @@ -103,9 +107,9 @@ def convert_from_wrroc(self, data: dict) -> dict:
executors=tes_executors,
inputs=tes_inputs,
outputs=tes_outputs,
creation_time=None,
creation_time=None,
logs=tes_logs,
state=TESState.UNKNOWN
state=TESState.UNKNOWN,
)

# Validate TES data before returning
Expand Down
46 changes: 25 additions & 21 deletions crategen/converters/wes_converter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pydantic import ValidationError

from ..models.wes_models import Log, RunRequest, WESData, WESOutputs
from ..models.wrroc_models import WRROCDataWES
from ..models.wes_models import Log, RunRequest, State, WESData
from ..models.wrroc_models import WRROCDataWES, WRROCOutputs
from ..utils import convert_to_iso8601
from .abstract_converter import AbstractConverter

Expand All @@ -26,20 +26,24 @@ def convert_to_wrroc(self, data: dict) -> dict:
except ValidationError as e:
raise ValueError(f"Invalid WES data: {e.errors()}") from e

# create the object using the model
wrroc_data = {
"@id": data_wes.run_id,
"name": data_wes.run_log.name,
"status": data_wes.state,
"startTime": convert_to_iso8601(data_wes.run_log.start_time),
"endTime": convert_to_iso8601(data_wes.run_log.end_time),
"result": [
{"@id": output.location, "name": output.name}
for output in data_wes.outputs
],
}

return wrroc_data
wrroc_output = (
WRROCOutputs(
id=data_wes.outputs.get("location"), name=data_wes.outputs.get("name")
)
if data_wes.outputs.get("location")
else None
)

wrroc_data = WRROCDataWES(
id=data_wes.run_id,
name=data_wes.run_log.name,
status=data_wes.state,
startTime=convert_to_iso8601(data_wes.run_log.start_time),
endTime=convert_to_iso8601(data_wes.run_log.end_time),
result=wrroc_output,
)

return wrroc_data.dict(exclude_none=True)

def convert_from_wrroc(self, data: dict) -> dict:
"""
Expand All @@ -62,9 +66,6 @@ def convert_from_wrroc(self, data: dict) -> dict:
f"Invalid WRROC data for WES conversion: {e.errors()}"
) from e

wes_outputs = [
WESOutputs(location=res.id, name=res.name) for res in data_wrroc.result
]
wes_run_log = Log(
name=data_wrroc.name,
start_time=data_wrroc.startTime,
Expand All @@ -74,15 +75,18 @@ def convert_from_wrroc(self, data: dict) -> dict:
workflow_params={}, # Adjust as necessary
workflow_type="CWL", # Example type, adjust as necessary
workflow_type_version="v1.0", # Example version, adjust as necessary
workflow_url="",
)

state = State(data_wrroc.status)

wes_data = WESData(
run_id=data_wrroc.id,
request=wes_request,
state=data_wrroc.status,
state=state,
run_log=wes_run_log,
task_logs=None, # Provide appropriate value
outputs=wes_outputs,
outputs={"location": data_wrroc.result.id, "name": data_wrroc.result.name},
)

# Validate WES data before returning
Expand Down
50 changes: 30 additions & 20 deletions crategen/models/wes_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,29 @@ class Log(BaseModel):
- **exit_code** (`Optional[int]`): The exit code of the program.
- **system_logs** (`optional[list[str]]`): Any logs the system decides are relevant, which are not tied directly to a workflow.
**Reference:** https://ga4gh.github.io/workflow-execution-service-schemas/docs/#tag/runlog_model
**Attributes:**
- **name** (`Optional[str]`): The task or workflow name.
- **cmd** (`Optional[list[str]]`): The command line that was executed.
- **start_time** (`Optional[str]`): When the command started executing, in ISO 8601 format.
- **end_time** (`Optional[str]`): When the command stopped executing, in ISO 8601 format.
- **stdout** (`Optional[str]`): A URL to retrieve standard output logs of the workflow run or task..
- **stderr** (`Optional[str]`): A URL to retrieve standard error logs of the workflow run or task.
- **exit_code** (`Optional[int]`): The exit code of the program.
- **system_logs** (`optional[list[str]]`): Any logs the system decides are relevant, which are not tied directly to a workflow.
**Reference:** https://ga4gh.github.io/workflow-execution-service-schemas/docs/#tag/runlog_model
"""

name: Optional[str]
start_time: Optional[datetime]
end_time: Optional[datetime]
cmd: Optional[list[str]]
stdout: Optional[str]
stderr: Optional[str]
exit_code: Optional[int]
system_logs: Optional[list[str]]
name: Optional[str] = None
start_time: Optional[datetime] = None
end_time: Optional[datetime] = None
cmd: Optional[list[str]] = None
stdout: Optional[str] = None
stderr: Optional[str] = None
exit_code: Optional[int] = None
system_logs: Optional[list[str]] = None

@validator("start_time", "end_time")
def validate_datetime(value):
Expand Down Expand Up @@ -84,9 +96,7 @@ class TaskLog(Log):

id: str
tes_uri: Optional[str]
name: str = Field(
...
) # test if adding Field makes a diff, gemini says no on specific questioning.
name: str = Field(...)


class RunRequest(BaseModel):
Expand All @@ -111,9 +121,9 @@ class RunRequest(BaseModel):
workflow_type: str
workflow_type_version: str
tags: Optional[dict[str, str]] = {}
workflow_engine_parameters: Optional[dict[str, str]]
workflow_engine: Optional[str]
workflow_engine_version: Optional[str]
workflow_engine_parameters: Optional[dict[str, str]] = None
workflow_engine: Optional[str] = None
workflow_engine_version: Optional[str] = None
workflow_url: str

@root_validator()
Expand Down Expand Up @@ -149,12 +159,12 @@ class WESData(BaseModel):
"""

run_id: str
request: Optional[RunRequest]
state: Optional[State]
run_log: Optional[Log]
task_logs_url: Optional[str]
task_logs: Optional[list[Log | TaskLog] | None]
outputs: dict[str, str]
request: Optional[RunRequest] = None
state: Optional[State] = None
run_log: Optional[Log] = None
task_logs_url: Optional[str] = None
task_logs: Optional[list[Log | TaskLog] | None] = None
outputs: dict[str, str] = None

@root_validator
def check_deprecated_fields(cls, values):
Expand Down
25 changes: 14 additions & 11 deletions crategen/models/wrroc_models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from typing import Optional
from datetime import datetime
from typing import Annotated, Optional

from pydantic import AnyUrl, BaseModel, Field

WRROC_ID = Annotated[str, Field(alias="@id")]


class WRROCInputs(BaseModel):
"""
Expand All @@ -12,7 +15,7 @@ class WRROCInputs(BaseModel):
name (str): The name of the input.
"""

id: str = Field(alias='@id')
id: WRROC_ID
name: str


Expand All @@ -25,7 +28,7 @@ class WRROCOutputs(BaseModel):
name (str): The name of the output.
"""

id: str = Field(alias='@id')
id: WRROC_ID
name: str


Expand All @@ -39,20 +42,20 @@ class WRROCDataBase(BaseModel):
description (Optional[str]): A brief description of the WRROC entity.
instrument (Optional[str]): The instrument used in the WRROC entity.
object (list[WRROCInputs]): A list of input objects related to the WRROC entity.
result (list[WRROCOutputs]): A list of output results related to the WRROC entity.
startTime (Optional[str]): The start time of the WRROC entity.
endTime (Optional[str]): The end time of the WRROC entity.
result (WRROCOutputs): A list of output results related to the WRROC entity.
startTime (Optional[datetime]): The start time of the WRROC entity.
endTime (Optional[datetime]): The end time of the WRROC entity.
version (Optional[str]): The version of the WRROC entity.
"""

id: str = Field(alias='@id')
id: WRROC_ID
name: str
description: Optional[str] = ""
instrument: Optional[str] = None
object: list[WRROCInputs] = Field(default_factory=list)
result: list[WRROCOutputs] = Field(default_factory=list)
startTime: Optional[str] = None
endTime: Optional[str] = None
result: Optional[WRROCOutputs] = None
startTime: Optional[datetime] = None
endTime: Optional[datetime] = None
version: Optional[str] = None

class Config:
Expand Down Expand Up @@ -102,7 +105,7 @@ class WRROCProcess(BaseModel):
profiles (Optional[list[AnyUrl]]): URLs to the RO-Crate profiles used.
"""

id: str = Field(alias='@id')
id: WRROC_ID
name: str
description: Optional[str] = ""
startTime: Optional[str] = None
Expand Down
4 changes: 4 additions & 0 deletions lefthook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@ pre-push:
files: git diff --name-only --diff-filter=d $(git merge-base origin/main HEAD)..HEAD
run: poetry run ruff check {files}
glob: '*.py'
mypy:
files: git diff --name-only --diff-filter=d $(git merge-base origin/main HEAD)..HEAD
run: poetry run mypy {files}
glob: '*.py'
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,12 @@ pytest-mock = "^3.14.0"
[tool.poetry.group.types.dependencies]
mypy = "^1.10.1"

[tool.mypy]
strict_optional = false


[[tool.mypy.overrides]]
module = ['crategen.validators']
ignore_errors = true

[tool.poetry.group.dev.dependencies]
ruff = "^0.6.3"
Expand Down

0 comments on commit 9f5b887

Please sign in to comment.