Skip to content

Commit

Permalink
petab1->2: create experiment df
Browse files Browse the repository at this point in the history
  • Loading branch information
dweindl committed Dec 18, 2024
1 parent 1d3fda1 commit d6f071b
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 92 deletions.
7 changes: 7 additions & 0 deletions petab/v1/calculate.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ def calculate_residuals_for_table(
Calculate residuals for a single measurement table.
For the arguments, see `calculate_residuals`.
"""
# below, we rely on a unique index
measurement_df = measurement_df.reset_index(drop=True)

# create residual df as copy of measurement df, change column
residual_df = measurement_df.copy(deep=True).rename(
columns={MEASUREMENT: RESIDUAL}
Expand All @@ -120,6 +123,10 @@ def calculate_residuals_for_table(
for col in compared_cols
]
mask = reduce(lambda x, y: x & y, masks)
if mask.sum() == 0:
raise ValueError(
f"Could not find simulation for measurement {row}."
)
simulation = simulation_df.loc[mask][SIMULATION].iloc[0]
if scale:
# apply scaling
Expand Down
8 changes: 4 additions & 4 deletions petab/v1/problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -1149,8 +1149,8 @@ def add_measurement(
sim_cond_id: str,
time: float,
measurement: float,
observable_parameters: Sequence[str] = None,
noise_parameters: Sequence[str] = None,
observable_parameters: Sequence[str | float] = None,
noise_parameters: Sequence[str | float] = None,
preeq_cond_id: str = None,
):
"""Add a measurement to the problem.
Expand All @@ -1172,11 +1172,11 @@ def add_measurement(
}
if observable_parameters is not None:
record[OBSERVABLE_PARAMETERS] = [
PARAMETER_SEPARATOR.join(observable_parameters)
PARAMETER_SEPARATOR.join(map(str, observable_parameters))
]
if noise_parameters is not None:
record[NOISE_PARAMETERS] = [
PARAMETER_SEPARATOR.join(noise_parameters)
PARAMETER_SEPARATOR.join(map(str, noise_parameters))
]
if preeq_cond_id is not None:
record[PREEQUILIBRATION_CONDITION_ID] = [preeq_cond_id]
Expand Down
14 changes: 1 addition & 13 deletions petab/v2/C.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,6 @@
#: Experiment ID column in the measurement table
EXPERIMENT_ID = "experimentId"

# TODO: remove
#: Preequilibration condition ID column in the measurement table
PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId"

# TODO: remove
#: Simulation condition ID column in the measurement table
SIMULATION_CONDITION_ID = "simulationConditionId"

#: Measurement value column in the measurement table
MEASUREMENT = "measurement"

Expand All @@ -45,17 +37,13 @@
#: Mandatory columns of measurement table
MEASUREMENT_DF_REQUIRED_COLS = [
OBSERVABLE_ID,
# TODO: add
# EXPERIMENT_ID,
SIMULATION_CONDITION_ID,
EXPERIMENT_ID,
MEASUREMENT,
TIME,
]

#: Optional columns of measurement table
MEASUREMENT_DF_OPTIONAL_COLS = [
# TODO: remove
PREEQUILIBRATION_CONDITION_ID,
OBSERVABLE_PARAMETERS,
NOISE_PARAMETERS,
DATASET_ID,
Expand Down
5 changes: 4 additions & 1 deletion petab/v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@

# import after v1
from ..version import __version__ # noqa: F401, E402
from . import models # noqa: F401, E402
from . import ( # noqa: F401, E402
C, # noqa: F401, E402
models, # noqa: F401, E402
)
from .conditions import * # noqa: F403, F401, E402
from .experiments import ( # noqa: F401, E402
get_experiment_df,
Expand Down
129 changes: 85 additions & 44 deletions petab/v2/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from .. import v2
from ..v1.lint import (
_check_df,
assert_measured_observables_defined,
assert_measurements_not_null,
assert_measurements_numeric,
assert_model_parameters_in_condition_or_parameter_table,
assert_no_leading_trailing_whitespace,
assert_parameter_bounds_are_numeric,
Expand All @@ -23,13 +26,16 @@
assert_parameter_prior_parameters_are_valid,
assert_parameter_prior_type_is_valid,
assert_parameter_scale_is_valid,
assert_unique_observable_ids,
assert_unique_parameter_ids,
check_ids,
check_measurement_df,
check_observable_df,
check_parameter_bounds,
)
from ..v1.measurements import split_parameter_replacement_list
from ..v1.measurements import (
assert_overrides_match_parameter_count,
split_parameter_replacement_list,
)
from ..v1.observables import get_output_parameters, get_placeholders
from ..v1.visualize.lint import validate_visualization_df
from ..v2.C import *
Expand Down Expand Up @@ -102,6 +108,23 @@ class ValidationError(ValidationIssue):
level: ValidationIssueSeverity = field(
default=ValidationIssueSeverity.ERROR, init=False
)
task: str | None = None

def __post_init__(self):
if self.task is None:
self.task = self._get_task_name()

def _get_task_name(self):
"""Get the name of the ValidationTask that raised this error."""
import inspect

# walk up the stack until we find the ValidationTask.run method
for frame_info in inspect.stack():
frame = frame_info.frame
if "self" in frame.f_locals:
task = frame.f_locals["self"]
if isinstance(task, ValidationTask):
return task.__class__.__name__


class ValidationResultList(list[ValidationIssue]):
Expand Down Expand Up @@ -237,8 +260,51 @@ def run(self, problem: Problem) -> ValidationIssue | None:
if problem.measurement_df is None:
return

df = problem.measurement_df
try:
check_measurement_df(problem.measurement_df, problem.observable_df)
_check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement")

for column_name in MEASUREMENT_DF_REQUIRED_COLS:
if not np.issubdtype(df[column_name].dtype, np.number):
assert_no_leading_trailing_whitespace(
df[column_name].values, column_name
)

for column_name in MEASUREMENT_DF_OPTIONAL_COLS:
if column_name in df and not np.issubdtype(
df[column_name].dtype, np.number
):
assert_no_leading_trailing_whitespace(
df[column_name].values, column_name
)

if problem.observable_df is not None:
assert_measured_observables_defined(df, problem.observable_df)
assert_overrides_match_parameter_count(
df, problem.observable_df
)

if OBSERVABLE_TRANSFORMATION in problem.observable_df:
# Check for positivity of measurements in case of
# log-transformation
assert_unique_observable_ids(problem.observable_df)
# If the above is not checked, in the following loop
# trafo may become a pandas Series
for measurement, obs_id in zip(
df[MEASUREMENT], df[OBSERVABLE_ID], strict=True
):
trafo = problem.observable_df.loc[
obs_id, OBSERVABLE_TRANSFORMATION
]
if measurement <= 0.0 and trafo in [LOG, LOG10]:
raise ValueError(
"Measurements with observable "
f"transformation {trafo} must be "
f"positive, but {measurement} <= 0."
)

assert_measurements_not_null(df)
assert_measurements_numeric(df)
except AssertionError as e:
return ValidationError(str(e))

Expand All @@ -247,46 +313,20 @@ def run(self, problem: Problem) -> ValidationIssue | None:
# condition table should be an error if the measurement table refers
# to conditions

# check that measured experiments/conditions exist
# TODO: fully switch to experiment table and remove this:
if SIMULATION_CONDITION_ID in problem.measurement_df:
if problem.condition_df is None:
return
used_conditions = set(
problem.measurement_df[SIMULATION_CONDITION_ID].dropna().values
)
if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df:
used_conditions |= set(
problem.measurement_df[PREEQUILIBRATION_CONDITION_ID]
.dropna()
.values
)
available_conditions = set(
problem.condition_df[CONDITION_ID].unique()
)
if missing_conditions := (used_conditions - available_conditions):
return ValidationError(
"Measurement table references conditions that "
"are not specified in the condition table: "
+ str(missing_conditions)
)
elif EXPERIMENT_ID in problem.measurement_df:
if problem.experiment_df is None:
return
used_experiments = set(
problem.measurement_df[EXPERIMENT_ID].values
)
available_experiments = set(
problem.condition_df[CONDITION_ID].unique()
# check that measured experiments
if problem.experiment_df is None:
return

used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values)
available_experiments = set(
problem.experiment_df[EXPERIMENT_ID].unique()
)
if missing_experiments := (used_experiments - available_experiments):
raise AssertionError(
"Measurement table references experiments that "
"are not specified in the experiments table: "
+ str(missing_experiments)
)
if missing_experiments := (
used_experiments - available_experiments
):
raise AssertionError(
"Measurement table references experiments that "
"are not specified in the experiments table: "
+ str(missing_experiments)
)


class CheckConditionTable(ValidationTask):
Expand Down Expand Up @@ -486,7 +526,7 @@ def run(self, problem: Problem) -> ValidationIssue | None:
)

required_conditions = problem.experiment_df[CONDITION_ID].unique()
existing_conditions = problem.condition_df.index
existing_conditions = problem.condition_df[CONDITION_ID].unique()

missing_conditions = set(required_conditions) - set(
existing_conditions
Expand Down Expand Up @@ -771,7 +811,8 @@ def append_overrides(overrides):
)

# parameters that are overridden via the condition table are not allowed
parameter_ids -= set(problem.condition_df[TARGET_ID].unique())
if problem.condition_df is not None:
parameter_ids -= set(problem.condition_df[TARGET_ID].unique())

return parameter_ids

Expand Down
Loading

0 comments on commit d6f071b

Please sign in to comment.