Skip to content

Commit

Permalink
petab1->2: create experiment df
Browse files Browse the repository at this point in the history
  • Loading branch information
dweindl committed Dec 18, 2024
1 parent 1d3fda1 commit 7361ef2
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 75 deletions.
14 changes: 1 addition & 13 deletions petab/v2/C.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@
#: Experiment ID column in the measurement table
EXPERIMENT_ID = "experimentId"

# TODO: remove
#: Preequilibration condition ID column in the measurement table
PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId"

# TODO: remove
#: Simulation condition ID column in the measurement table
SIMULATION_CONDITION_ID = "simulationConditionId"

#: Measurement value column in the measurement table
MEASUREMENT = "measurement"

Expand All @@ -45,17 +37,13 @@
#: Mandatory columns of measurement table
MEASUREMENT_DF_REQUIRED_COLS = [
OBSERVABLE_ID,
# TODO: add
# EXPERIMENT_ID,
SIMULATION_CONDITION_ID,
EXPERIMENT_ID,
MEASUREMENT,
TIME,
]

#: Optional columns of measurement table
MEASUREMENT_DF_OPTIONAL_COLS = [
# TODO: remove
PREEQUILIBRATION_CONDITION_ID,
OBSERVABLE_PARAMETERS,
NOISE_PARAMETERS,
DATASET_ID,
Expand Down
5 changes: 4 additions & 1 deletion petab/v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@

# import after v1
from ..version import __version__ # noqa: F401, E402
from . import models # noqa: F401, E402
from . import ( # noqa: F401, E402
C, # noqa: F401, E402
models, # noqa: F401, E402
)
from .conditions import * # noqa: F403, F401, E402
from .experiments import ( # noqa: F401, E402
get_experiment_df,
Expand Down
110 changes: 67 additions & 43 deletions petab/v2/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from .. import v2
from ..v1.lint import (
_check_df,
assert_measured_observables_defined,
assert_measurements_not_null,
assert_measurements_numeric,
assert_model_parameters_in_condition_or_parameter_table,
assert_no_leading_trailing_whitespace,
assert_parameter_bounds_are_numeric,
Expand All @@ -23,13 +26,16 @@
assert_parameter_prior_parameters_are_valid,
assert_parameter_prior_type_is_valid,
assert_parameter_scale_is_valid,
assert_unique_observable_ids,
assert_unique_parameter_ids,
check_ids,
check_measurement_df,
check_observable_df,
check_parameter_bounds,
)
from ..v1.measurements import split_parameter_replacement_list
from ..v1.measurements import (
assert_overrides_match_parameter_count,
split_parameter_replacement_list,
)
from ..v1.observables import get_output_parameters, get_placeholders
from ..v1.visualize.lint import validate_visualization_df
from ..v2.C import *
Expand Down Expand Up @@ -237,8 +243,51 @@ def run(self, problem: Problem) -> ValidationIssue | None:
if problem.measurement_df is None:
return

df = problem.measurement_df
try:
check_measurement_df(problem.measurement_df, problem.observable_df)
_check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement")

for column_name in MEASUREMENT_DF_REQUIRED_COLS:
if not np.issubdtype(df[column_name].dtype, np.number):
assert_no_leading_trailing_whitespace(
df[column_name].values, column_name
)

for column_name in MEASUREMENT_DF_OPTIONAL_COLS:
if column_name in df and not np.issubdtype(
df[column_name].dtype, np.number
):
assert_no_leading_trailing_whitespace(
df[column_name].values, column_name
)

if problem.observable_df is not None:
assert_measured_observables_defined(df, problem.observable_df)
assert_overrides_match_parameter_count(
df, problem.observable_df
)

if OBSERVABLE_TRANSFORMATION in problem.observable_df:
# Check for positivity of measurements in case of
# log-transformation
assert_unique_observable_ids(problem.observable_df)
# If the above is not checked, in the following loop
# trafo may become a pandas Series
for measurement, obs_id in zip(
df[MEASUREMENT], df[OBSERVABLE_ID], strict=True
):
trafo = problem.observable_df.loc[
obs_id, OBSERVABLE_TRANSFORMATION
]
if measurement <= 0.0 and trafo in [LOG, LOG10]:
raise ValueError(
"Measurements with observable "
f"transformation {trafo} must be "
f"positive, but {measurement} <= 0."
)

assert_measurements_not_null(df)
assert_measurements_numeric(df)
except AssertionError as e:
return ValidationError(str(e))

Expand All @@ -247,46 +296,20 @@ def run(self, problem: Problem) -> ValidationIssue | None:
# condition table should be an error if the measurement table refers
# to conditions

# check that measured experiments/conditions exist
# TODO: fully switch to experiment table and remove this:
if SIMULATION_CONDITION_ID in problem.measurement_df:
if problem.condition_df is None:
return
used_conditions = set(
problem.measurement_df[SIMULATION_CONDITION_ID].dropna().values
)
if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df:
used_conditions |= set(
problem.measurement_df[PREEQUILIBRATION_CONDITION_ID]
.dropna()
.values
)
available_conditions = set(
problem.condition_df[CONDITION_ID].unique()
)
if missing_conditions := (used_conditions - available_conditions):
return ValidationError(
"Measurement table references conditions that "
"are not specified in the condition table: "
+ str(missing_conditions)
)
elif EXPERIMENT_ID in problem.measurement_df:
if problem.experiment_df is None:
return
used_experiments = set(
problem.measurement_df[EXPERIMENT_ID].values
)
available_experiments = set(
problem.condition_df[CONDITION_ID].unique()
# check that measured experiments
if problem.experiment_df is None:
return

used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values)
available_experiments = set(
problem.condition_df[CONDITION_ID].unique()
)
if missing_experiments := (used_experiments - available_experiments):
raise AssertionError(
"Measurement table references experiments that "
"are not specified in the experiments table: "
+ str(missing_experiments)
)
if missing_experiments := (
used_experiments - available_experiments
):
raise AssertionError(
"Measurement table references experiments that "
"are not specified in the experiments table: "
+ str(missing_experiments)
)


class CheckConditionTable(ValidationTask):
Expand Down Expand Up @@ -771,7 +794,8 @@ def append_overrides(overrides):
)

# parameters that are overridden via the condition table are not allowed
parameter_ids -= set(problem.condition_df[TARGET_ID].unique())
if problem.condition_df is not None:
parameter_ids -= set(problem.condition_df[TARGET_ID].unique())

return parameter_ids

Expand Down
102 changes: 93 additions & 9 deletions petab/v2/petab1to2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from itertools import chain
from pathlib import Path
from urllib.parse import urlparse
from uuid import uuid4

import numpy as np
import pandas as pd
from pandas.io.common import get_handle, is_url

Expand Down Expand Up @@ -98,10 +98,81 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
condition_df = v1v2_condition_df(condition_df, petab_problem.model)
v2.write_condition_df(condition_df, get_dest_path(condition_file))

# records for the experiment table to be created
experiments = []

def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str:
if not sim_cond_id and not preeq_cond_id:
return ""
if preeq_cond_id:
preeq_cond_id = f"{preeq_cond_id}_"
exp_id = f"experiment_{preeq_cond_id}{sim_cond_id}"
if exp_id in experiments: # noqa: B023
i = 1
while f"{exp_id}_{i}" in experiments: # noqa: B023
i += 1
exp_id = f"{exp_id}_{i}"
return exp_id

measured_experiments = (
petab_problem.get_simulation_conditions_from_measurement_df()
)
for (
_,
row,
) in measured_experiments.iterrows():
# generate a new experiment for each simulation / pre-eq condition
# combination
sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID]
preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "")
exp_id = create_experiment_id(sim_cond_id, preeq_cond_id)
if preeq_cond_id:
experiments.append(
{
v2.C.EXPERIMENT_ID: exp_id,
v2.C.CONDITION_ID: preeq_cond_id,
v2.C.TIME: float("-inf"),
}
)
experiments.append(
{
v2.C.EXPERIMENT_ID: exp_id,
v2.C.CONDITION_ID: sim_cond_id,
v2.C.TIME: 0,
}
)
if experiments:
exp_table_path = output_dir / "experiments.tsv"
if exp_table_path.exists():
raise ValueError(
f"Experiment table file {exp_table_path} already exists."
)
problem_config[v2.C.EXPERIMENT_FILES] = [exp_table_path.name]
v2.write_experiment_df(
v2.get_experiment_df(pd.DataFrame(experiments)), exp_table_path
)

for measurement_file in problem_config.get(v2.C.MEASUREMENT_FILES, []):
measurement_df = v1.get_measurement_df(
get_src_path(measurement_file)
)
# if there is already an experiment ID column, we rename it
if v2.C.EXPERIMENT_ID in measurement_df.columns:
measurement_df.rename(
columns={v2.C.EXPERIMENT_ID: f"experiment_id_{uuid4()}"},
inplace=True,
)
# add pre-eq condition id if not present or convert to string
# for simplicity
if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns:
measurement_df[
v1.C.PREEQUILIBRATION_CONDITION_ID
] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].astype(
str
)
else:
measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""

if (
petab_problem.condition_df is not None
and len(
Expand All @@ -110,20 +181,33 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None):
)
== 0
):
# can't have "empty" conditions with no overrides in v2
# TODO: this needs to be done condition wise
measurement_df[v2.C.SIMULATION_CONDITION_ID] = np.nan
# we can't have "empty" conditions with no overrides in v2,
# therefore, we drop the respective condition ID completely
# TODO: or can we?
# TODO: this needs to be checked condition-wise, not globally
measurement_df[v1.C.SIMULATION_CONDITION_ID] = ""
if (
v1.C.PREEQUILIBRATION_CONDITION_ID
in measurement_df.columns
):
measurement_df[v2.C.PREEQUILIBRATION_CONDITION_ID] = np.nan
measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = ""
# condition IDs to experiment IDs
measurement_df.insert(
0,
v2.C.EXPERIMENT_ID,
measurement_df.apply(
lambda row: create_experiment_id(
row[v1.C.SIMULATION_CONDITION_ID],
row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, ""),
),
axis=1,
),
)
del measurement_df[v1.C.SIMULATION_CONDITION_ID]
del measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID]
v2.write_measurement_df(
measurement_df, get_dest_path(measurement_file)
)
# TODO: Measurements: preequilibration to experiments/timecourses once
# finalized
...

# validate updated Problem
validation_issues = v2.lint_problem(new_yaml_file)
Expand Down Expand Up @@ -189,7 +273,7 @@ def v1v2_condition_df(
"""Convert condition table from petab v1 to v2."""
condition_df = condition_df.copy().reset_index()
with suppress(KeyError):
# TODO: are condition names still supported in v2?
# conditionName was dropped in PEtab v2
condition_df.drop(columns=[v2.C.CONDITION_NAME], inplace=True)

condition_df = condition_df.melt(
Expand Down
10 changes: 3 additions & 7 deletions petab/v2/problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,27 +908,25 @@ def add_parameter(
def add_measurement(
self,
obs_id: str,
sim_cond_id: str,
experiment_id: str,
time: float,
measurement: float,
observable_parameters: Sequence[str] = None,
noise_parameters: Sequence[str] = None,
preeq_cond_id: str = None,
):
"""Add a measurement to the problem.
Arguments:
obs_id: The observable ID
sim_cond_id: The simulation condition ID
experiment_id: The experiment ID
time: The measurement time
measurement: The measurement value
observable_parameters: The observable parameters
noise_parameters: The noise parameters
preeq_cond_id: The pre-equilibration condition ID
"""
record = {
OBSERVABLE_ID: [obs_id],
SIMULATION_CONDITION_ID: [sim_cond_id],
EXPERIMENT_ID: [experiment_id],
TIME: [time],
MEASUREMENT: [measurement],
}
Expand All @@ -940,8 +938,6 @@ def add_measurement(
record[NOISE_PARAMETERS] = [
PARAMETER_SEPARATOR.join(noise_parameters)
]
if preeq_cond_id is not None:
record[PREEQUILIBRATION_CONDITION_ID] = [preeq_cond_id]

tmp_df = pd.DataFrame(record)
self.measurement_df = (
Expand Down
4 changes: 2 additions & 2 deletions tests/v2/test_problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_load_remote():
"""Test loading remote files"""
yaml_url = (
"https://raw.githubusercontent.com/PEtab-dev/petab_test_suite"
"/update_v2/petabtests/cases/v2.0.0/sbml/0001/_0001.yaml"
"/update_v2/petabtests/cases/v2.0.0/sbml/0010/_0010.yaml"
)
petab_problem = Problem.from_yaml(yaml_url)

Expand Down Expand Up @@ -83,7 +83,7 @@ def test_problem_from_yaml_multiple_files():
problem.experiment_df, Path(tmpdir, f"experiments{i}.tsv")
)

problem.add_measurement(f"observable{i}", f"condition{i}", 1, 1)
problem.add_measurement(f"observable{i}", f"experiment{i}", 1, 1)
petab.write_measurement_df(
problem.measurement_df, Path(tmpdir, f"measurements{i}.tsv")
)
Expand Down

0 comments on commit 7361ef2

Please sign in to comment.