Skip to content

Commit

Permalink
Fix experiment table in upconversion
Browse files Browse the repository at this point in the history
  • Loading branch information
dweindl committed Dec 20, 2024
1 parent 6a9ecd0 commit d3dd841
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 17 deletions.
39 changes: 26 additions & 13 deletions petab/v2/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,11 +228,16 @@ class CheckValidPetabIdColumn(ValidationTask):
"""A task to check that a given column contains only valid PEtab IDs."""

def __init__(
self, table_name: str, column_name: str, required_column: bool = True
self,
table_name: str,
column_name: str,
required_column: bool = True,
ignore_nan: bool = False,
):
self.table_name = table_name
self.column_name = column_name
self.required_column = required_column
self.ignore_nan = ignore_nan

def run(self, problem: Problem) -> ValidationIssue | None:
df = getattr(problem, f"{self.table_name}_df")
Expand All @@ -248,7 +253,10 @@ def run(self, problem: Problem) -> ValidationIssue | None:
return

try:
check_ids(df[self.column_name].values, kind=self.column_name)
ids = df[self.column_name].values
if self.ignore_nan:
ids = ids[~pd.isna(ids)]
check_ids(ids, kind=self.column_name)
except ValueError as e:
return ValidationError(str(e))

Expand Down Expand Up @@ -308,21 +316,26 @@ def run(self, problem: Problem) -> ValidationIssue | None:
except AssertionError as e:
return ValidationError(str(e))

# TODO: introduce some option for validation partial vs full
# TODO: introduce some option for validation of partial vs full
# problem. if this is supposed to be a complete problem, a missing
# condition table should be an error if the measurement table refers
# to conditions

# check that measured experiments
if problem.experiment_df is None:
return

# to conditions, otherwise it should maximally be a warning
used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values)
available_experiments = set(
problem.experiment_df[EXPERIMENT_ID].unique()
# handle default-experiment
used_experiments = set(
filter(
lambda x: not isinstance(x, float) or not np.isnan(x),
used_experiments,
)
)
# check that measured experiments exist
available_experiments = (
set(problem.experiment_df[EXPERIMENT_ID].unique())
if problem.experiment_df is not None
else set()
)
if missing_experiments := (used_experiments - available_experiments):
raise AssertionError(
return ValidationError(
"Measurement table references experiments that "
"are not specified in the experiments table: "
+ str(missing_experiments)
Expand Down Expand Up @@ -826,7 +839,7 @@ def append_overrides(overrides):
CheckMeasurementTable(),
CheckConditionTable(),
CheckExperimentTable(),
CheckValidPetabIdColumn("experiment", EXPERIMENT_ID),
CheckValidPetabIdColumn("experiment", EXPERIMENT_ID, ignore_nan=True),
CheckValidPetabIdColumn("experiment", CONDITION_ID),
CheckExperimentConditionsExist(),
CheckObservableTable(),
Expand Down
28 changes: 24 additions & 4 deletions petab/v2/petab1to2.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
if get_major_version(yaml_config) != 1:
raise ValueError("PEtab problem is not version 1.")
petab_problem = ProblemV1.from_yaml(yaml_file or yaml_config)
# get rid of conditionName column if present (unsupported in v2)
petab_problem.condition_df = petab_problem.condition_df.drop(
columns=[v1.C.CONDITION_NAME], errors="ignore"
)
if v1.lint_problem(petab_problem):
raise ValueError("Provided PEtab problem does not pass linting.")

Expand All @@ -72,8 +76,6 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
# Write new YAML file
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
new_yaml_file = output_dir / Path(yaml_file).name
write_yaml(new_yaml_config, new_yaml_file)

# Update tables
# condition tables, observable tables, SBML files, parameter table:
Expand Down Expand Up @@ -104,6 +106,19 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
if not sim_cond_id and not preeq_cond_id:
return ""
# check whether the conditions will exist in the v2 condition table
sim_cond_exists = (
petab_problem.condition_df.loc[sim_cond_id].notna().any()
)
preeq_cond_exists = (
preeq_cond_id
and petab_problem.condition_df.loc[preeq_cond_id].notna().any()
)
if not sim_cond_exists and not preeq_cond_exists:
# if we have only all-NaN conditions, we don't create a new
# experiment
return ""

if preeq_cond_id:
preeq_cond_id = f"{preeq_cond_id}_"
exp_id = f"experiment__{preeq_cond_id}__{sim_cond_id}"
Expand All @@ -126,6 +141,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID]
preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "")
exp_id = create_experiment_id(sim_cond_id, preeq_cond_id)
if not exp_id:
continue
if preeq_cond_id:
experiments.append(
{
Expand Down Expand Up @@ -167,8 +184,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns:
measurement_df[
v1.C.PREEQUILIBRATION_CONDITION_ID
] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].astype(
str
] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].fillna(
""
)
else:
measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""
Expand Down Expand Up @@ -209,6 +226,9 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
measurement_df, get_dest_path(measurement_file)
)

new_yaml_file = output_dir / Path(yaml_file).name
write_yaml(new_yaml_config, new_yaml_file)

# validate updated Problem
validation_issues = v2.lint_problem(new_yaml_file)

Expand Down

0 comments on commit d3dd841

Please sign in to comment.