Skip to content

Commit

Permalink
v2: fixes to validation and upconversion (#351)
Browse files Browse the repository at this point in the history
* don't create unnecessary experiment tables
* write yaml file *after* updating the config dict
* check for missing experiments
* handle anonymous experiments
  • Loading branch information
dweindl authored Dec 20, 2024
1 parent 6e762c6 commit ec58463
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 29 deletions.
38 changes: 26 additions & 12 deletions petab/v2/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,16 @@ class CheckValidPetabIdColumn(ValidationTask):
"""A task to check that a given column contains only valid PEtab IDs."""

def __init__(
self, table_name: str, column_name: str, required_column: bool = True
self,
table_name: str,
column_name: str,
required_column: bool = True,
ignore_nan: bool = False,
):
self.table_name = table_name
self.column_name = column_name
self.required_column = required_column
self.ignore_nan = ignore_nan

def run(self, problem: Problem) -> ValidationIssue | None:
df = getattr(problem, f"{self.table_name}_df")
Expand All @@ -248,7 +253,10 @@ def run(self, problem: Problem) -> ValidationIssue | None:
return

try:
check_ids(df[self.column_name].values, kind=self.column_name)
ids = df[self.column_name].values
if self.ignore_nan:
ids = ids[~pd.isna(ids)]
check_ids(ids, kind=self.column_name)
except ValueError as e:
return ValidationError(str(e))

Expand Down Expand Up @@ -308,21 +316,26 @@ def run(self, problem: Problem) -> ValidationIssue | None:
except AssertionError as e:
return ValidationError(str(e))

# TODO: introduce some option for validation partial vs full
# TODO: introduce some option for validation of partial vs full
# problem. if this is supposed to be a complete problem, a missing
# condition table should be an error if the measurement table refers
# to conditions

# check that measured experiments
if problem.experiment_df is None:
return

# to conditions, otherwise it should maximally be a warning
used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values)
available_experiments = set(
problem.experiment_df[EXPERIMENT_ID].unique()
# handle default-experiment
used_experiments = set(
filter(
lambda x: not pd.isna(x),
used_experiments,
)
)
# check that measured experiments exist
available_experiments = (
set(problem.experiment_df[EXPERIMENT_ID].unique())
if problem.experiment_df is not None
else set()
)
if missing_experiments := (used_experiments - available_experiments):
raise AssertionError(
return ValidationError(
"Measurement table references experiments that "
"are not specified in the experiments table: "
+ str(missing_experiments)
Expand Down Expand Up @@ -826,6 +839,7 @@ def append_overrides(overrides):
CheckMeasurementTable(),
CheckConditionTable(),
CheckExperimentTable(),
CheckValidPetabIdColumn("measurement", EXPERIMENT_ID, ignore_nan=True),
CheckValidPetabIdColumn("experiment", EXPERIMENT_ID),
CheckValidPetabIdColumn("experiment", CONDITION_ID),
CheckExperimentConditionsExist(),
Expand Down
48 changes: 31 additions & 17 deletions petab/v2/petab1to2.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,11 @@
import pandas as pd
from pandas.io.common import get_handle, is_url

import petab.v1.C
from petab.models import MODEL_TYPE_SBML
from petab.v1 import Problem as ProblemV1
from petab.yaml import get_path_prefix

from .. import v1, v2
from ..v1.yaml import load_yaml, validate, write_yaml
from ..v1 import Problem as ProblemV1
from ..v1.yaml import get_path_prefix, load_yaml, validate, write_yaml
from ..versions import get_major_version
from .models import MODEL_TYPE_SBML

__all__ = ["petab1to2"]

Expand Down Expand Up @@ -63,18 +60,18 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
if get_major_version(yaml_config) != 1:
raise ValueError("PEtab problem is not version 1.")
petab_problem = ProblemV1.from_yaml(yaml_file or yaml_config)
# get rid of conditionName column if present (unsupported in v2)
petab_problem.condition_df = petab_problem.condition_df.drop(
columns=[v1.C.CONDITION_NAME], errors="ignore"
)
if v1.lint_problem(petab_problem):
raise ValueError("Provided PEtab problem does not pass linting.")

output_dir = Path(output_dir)

# Update YAML file
new_yaml_config = _update_yaml(yaml_config)

# Write new YAML file
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
new_yaml_file = output_dir / Path(yaml_file).name
write_yaml(new_yaml_config, new_yaml_file)

# Update tables
# condition tables, observable tables, SBML files, parameter table:
# no changes - just copy
Expand Down Expand Up @@ -104,6 +101,19 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
if not sim_cond_id and not preeq_cond_id:
return ""
# check whether the conditions will exist in the v2 condition table
sim_cond_exists = (
petab_problem.condition_df.loc[sim_cond_id].notna().any()
)
preeq_cond_exists = (
preeq_cond_id
and petab_problem.condition_df.loc[preeq_cond_id].notna().any()
)
if not sim_cond_exists and not preeq_cond_exists:
# if we have only all-NaN conditions, we don't create a new
# experiment
return ""

if preeq_cond_id:
preeq_cond_id = f"{preeq_cond_id}_"
exp_id = f"experiment__{preeq_cond_id}__{sim_cond_id}"
Expand All @@ -126,6 +136,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID]
preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "")
exp_id = create_experiment_id(sim_cond_id, preeq_cond_id)
if not exp_id:
continue
if preeq_cond_id:
experiments.append(
{
Expand Down Expand Up @@ -165,10 +177,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
# add pre-eq condition id if not present or convert to string
# for simplicity
if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns:
measurement_df[
v1.C.PREEQUILIBRATION_CONDITION_ID
] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].astype(
str
measurement_df.fillna(
{v1.C.PREEQUILIBRATION_CONDITION_ID: ""}, inplace=True
)
else:
measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""
Expand All @@ -177,7 +187,7 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
petab_problem.condition_df is not None
and len(
set(petab_problem.condition_df.columns)
- {petab.v1.C.CONDITION_NAME}
- {v1.C.CONDITION_NAME}
)
== 0
):
Expand Down Expand Up @@ -209,6 +219,10 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
measurement_df, get_dest_path(measurement_file)
)

# Write new YAML file
new_yaml_file = output_dir / Path(yaml_file).name
write_yaml(new_yaml_config, new_yaml_file)

# validate updated Problem
validation_issues = v2.lint_problem(new_yaml_file)

Expand Down

0 comments on commit ec58463

Please sign in to comment.