Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v2: fixes to validation and upconversion #351

Merged
merged 2 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 26 additions & 12 deletions petab/v2/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,16 @@ class CheckValidPetabIdColumn(ValidationTask):
"""A task to check that a given column contains only valid PEtab IDs."""

def __init__(
self, table_name: str, column_name: str, required_column: bool = True
self,
table_name: str,
column_name: str,
required_column: bool = True,
ignore_nan: bool = False,
):
self.table_name = table_name
self.column_name = column_name
self.required_column = required_column
self.ignore_nan = ignore_nan

def run(self, problem: Problem) -> ValidationIssue | None:
df = getattr(problem, f"{self.table_name}_df")
Expand All @@ -248,7 +253,10 @@ def run(self, problem: Problem) -> ValidationIssue | None:
return

try:
check_ids(df[self.column_name].values, kind=self.column_name)
ids = df[self.column_name].values
if self.ignore_nan:
ids = ids[~pd.isna(ids)]
check_ids(ids, kind=self.column_name)
except ValueError as e:
return ValidationError(str(e))

Expand Down Expand Up @@ -308,21 +316,26 @@ def run(self, problem: Problem) -> ValidationIssue | None:
except AssertionError as e:
return ValidationError(str(e))

# TODO: introduce some option for validation partial vs full
# TODO: introduce some option for validation of partial vs full
# problem. if this is supposed to be a complete problem, a missing
# condition table should be an error if the measurement table refers
# to conditions

# check that measured experiments
if problem.experiment_df is None:
return

# to conditions, otherwise it should maximally be a warning
used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values)
available_experiments = set(
problem.experiment_df[EXPERIMENT_ID].unique()
# handle default-experiment
used_experiments = set(
filter(
lambda x: not pd.isna(x),
used_experiments,
)
)
# check that measured experiments exist
available_experiments = (
set(problem.experiment_df[EXPERIMENT_ID].unique())
if problem.experiment_df is not None
else set()
)
if missing_experiments := (used_experiments - available_experiments):
raise AssertionError(
return ValidationError(
"Measurement table references experiments that "
"are not specified in the experiments table: "
+ str(missing_experiments)
Expand Down Expand Up @@ -826,6 +839,7 @@ def append_overrides(overrides):
CheckMeasurementTable(),
CheckConditionTable(),
CheckExperimentTable(),
CheckValidPetabIdColumn("measurement", EXPERIMENT_ID, ignore_nan=True),
CheckValidPetabIdColumn("experiment", EXPERIMENT_ID),
CheckValidPetabIdColumn("experiment", CONDITION_ID),
CheckExperimentConditionsExist(),
Expand Down
48 changes: 31 additions & 17 deletions petab/v2/petab1to2.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,11 @@
import pandas as pd
from pandas.io.common import get_handle, is_url

import petab.v1.C
from petab.models import MODEL_TYPE_SBML
from petab.v1 import Problem as ProblemV1
from petab.yaml import get_path_prefix

from .. import v1, v2
from ..v1.yaml import load_yaml, validate, write_yaml
from ..v1 import Problem as ProblemV1
from ..v1.yaml import get_path_prefix, load_yaml, validate, write_yaml
from ..versions import get_major_version
from .models import MODEL_TYPE_SBML

__all__ = ["petab1to2"]

Expand Down Expand Up @@ -63,18 +60,18 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
if get_major_version(yaml_config) != 1:
raise ValueError("PEtab problem is not version 1.")
petab_problem = ProblemV1.from_yaml(yaml_file or yaml_config)
# get rid of conditionName column if present (unsupported in v2)
petab_problem.condition_df = petab_problem.condition_df.drop(
columns=[v1.C.CONDITION_NAME], errors="ignore"
)
if v1.lint_problem(petab_problem):
raise ValueError("Provided PEtab problem does not pass linting.")

output_dir = Path(output_dir)

# Update YAML file
new_yaml_config = _update_yaml(yaml_config)

# Write new YAML file
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
new_yaml_file = output_dir / Path(yaml_file).name
write_yaml(new_yaml_config, new_yaml_file)

# Update tables
# condition tables, observable tables, SBML files, parameter table:
# no changes - just copy
Expand Down Expand Up @@ -104,6 +101,19 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
if not sim_cond_id and not preeq_cond_id:
return ""
# check whether the conditions will exist in the v2 condition table
sim_cond_exists = (
petab_problem.condition_df.loc[sim_cond_id].notna().any()
)
preeq_cond_exists = (
preeq_cond_id
and petab_problem.condition_df.loc[preeq_cond_id].notna().any()
)
if not sim_cond_exists and not preeq_cond_exists:
# if we have only all-NaN conditions, we don't create a new
# experiment
return ""

if preeq_cond_id:
preeq_cond_id = f"{preeq_cond_id}_"
exp_id = f"experiment__{preeq_cond_id}__{sim_cond_id}"
Expand All @@ -126,6 +136,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID]
preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "")
exp_id = create_experiment_id(sim_cond_id, preeq_cond_id)
if not exp_id:
continue
if preeq_cond_id:
experiments.append(
{
Expand Down Expand Up @@ -165,10 +177,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
# add pre-eq condition id if not present or convert to string
# for simplicity
if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns:
measurement_df[
v1.C.PREEQUILIBRATION_CONDITION_ID
] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].astype(
str
measurement_df.fillna(
{v1.C.PREEQUILIBRATION_CONDITION_ID: ""}, inplace=True
)
else:
measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""
Expand All @@ -177,7 +187,7 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
petab_problem.condition_df is not None
and len(
set(petab_problem.condition_df.columns)
- {petab.v1.C.CONDITION_NAME}
- {v1.C.CONDITION_NAME}
)
== 0
):
Expand Down Expand Up @@ -209,6 +219,10 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
measurement_df, get_dest_path(measurement_file)
)

# Write new YAML file
new_yaml_file = output_dir / Path(yaml_file).name
write_yaml(new_yaml_config, new_yaml_file)
dweindl marked this conversation as resolved.
Show resolved Hide resolved

# validate updated Problem
validation_issues = v2.lint_problem(new_yaml_file)

Expand Down
Loading