Skip to content

Commit

Permalink
Revert "feat: SoftMax Pro - report non numeric values in error docume…
Browse files Browse the repository at this point in the history
…nt (#713)"

This reverts commit 477d29e.
  • Loading branch information
alejandro-salgado committed Oct 16, 2024
1 parent bdb22b9 commit 81e24b8
Show file tree
Hide file tree
Showing 24 changed files with 25,395 additions and 89,737 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,6 @@ def _create_measurements(plate_block: PlateBlock, position: str) -> list[Measure
total_measurement_time_setting=plate_block.header.read_time,
read_interval_setting=plate_block.header.read_interval,
number_of_scans_setting=plate_block.header.kinetic_points,
# Error documents
error_document=data_element.error_document,
)
for idx, data_element in enumerate(plate_block.iter_data_elements(position))
]
Expand Down
174 changes: 79 additions & 95 deletions src/allotropy/parsers/moldev_softmax_pro/softmax_pro_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@
import pandas as pd

from allotropy.allotrope.schema_mappers.adm.plate_reader.rec._2024._06.plate_reader import (
ErrorDocument,
MeasurementType,
ScanPositionSettingPlateReader,
)
from allotropy.exceptions import (
AllotropeConversionError,
get_key_or_error,
)
from allotropy.parsers.constants import NEGATIVE_ZERO
from allotropy.parsers.lines_reader import CsvReader
from allotropy.parsers.utils.pandas import rm_df_columns, SeriesData, set_columns
from allotropy.parsers.utils.uuids import random_uuid_str
Expand Down Expand Up @@ -114,15 +112,13 @@ class GroupDataElement:
position: str
plate: str
entries: list[GroupDataElementEntry]
errors: list[ErrorDocument]


@dataclass(frozen=True)
class GroupSampleData:
identifier: str
data_elements: list[GroupDataElement]
aggregated_entries: list[GroupDataElementEntry]
aggregated_errors: list[ErrorDocument]

@classmethod
def create(cls, data: pd.DataFrame) -> GroupSampleData:
Expand All @@ -131,7 +127,7 @@ def create(cls, data: pd.DataFrame) -> GroupSampleData:
identifier = top_row[str, "Sample"]
data = rm_df_columns(data, r"^Sample$|^Standard Value|^R$|^Unnamed: \d+$")
# Columns are considered "numeric" if the value of the first row is a float
# Non-numeric values such as "Mask" and "Range?" will be reported as errors.
# "Mask" and "Range?" are special cases that will be considered NaN.
numeric_columns = [
column
for column in data.columns
Expand All @@ -147,43 +143,40 @@ def create(cls, data: pd.DataFrame) -> GroupSampleData:
else:
normal_columns.append(column)

data_elements = []
for row in row_data:
entries, errors = cls._get_entries_and_errors(row, normal_columns)
data_elements.append(
return GroupSampleData(
identifier=identifier,
data_elements=[
GroupDataElement(
sample=identifier,
position=row[str, ["Well", "Wells"]],
plate=row[str, "WellPlateName"],
entries=entries,
errors=errors,
entries=[
element_entry
for column_name in normal_columns
if (element_entry := cls._get_element_entry(row, column_name))
is not None
],
)
)

aggregated_entries, aggregated_errors = cls._get_entries_and_errors(
top_row, aggregated_columns
)

return GroupSampleData(
identifier=identifier,
data_elements=data_elements,
aggregated_entries=aggregated_entries,
aggregated_errors=aggregated_errors,
for row in row_data
],
aggregated_entries=[
element_entry
for column_name in aggregated_columns
if (element_entry := cls._get_element_entry(top_row, column_name))
is not None
],
)

@classmethod
def _get_entries_and_errors(
cls, data_row: SeriesData, column_names: list[str]
) -> tuple[list[GroupDataElementEntry], list[ErrorDocument]]:
entries = []
errors = []
for column in column_names:
value = data_row.get(float, column)
if value is not None:
entries.append(GroupDataElementEntry(column, value))
elif (error := data_row.get(str, column)) is not None:
errors.append(ErrorDocument(error, column))
return entries, errors
def _get_element_entry(
cls, data_row: SeriesData, column_name: str
) -> GroupDataElementEntry | None:
if (value := data_row.get(float, column_name)) is not None:
return GroupDataElementEntry(
name=column_name,
value=value,
)
return None


@dataclass(frozen=True)
Expand Down Expand Up @@ -311,7 +304,6 @@ class DataElement:
wavelength: float
position: str
value: float
error_document: list[ErrorDocument]
elapsed_time: list[float] = field(default_factory=list)
kinetic_measures: list[float | None] = field(default_factory=list)
sample_id: str | None = None
Expand All @@ -334,58 +326,54 @@ class PlateWavelengthData:

@staticmethod
def create(
header: PlateHeader,
plate_name: str,
temperature: float | None,
elapsed_time: float | None,
wavelength: float,
df_data: pd.DataFrame,
) -> PlateWavelengthData:
# Since value is required for the measurement class (absorbance, luminescense and fluorescence)
# we don't store data for NaN values
# TODO: Report error documents for NaN values
data = {
f"{num_to_chars(row_idx)}{col}": raw_value
f"{num_to_chars(row_idx)}{col}": value
for row_idx, *row_data in df_data.itertuples()
for col, raw_value in zip(df_data.columns, row_data, strict=True)
if (value := try_non_nan_float_or_none(raw_value)) is not None
}

data_elements = {}
for position, raw_value in data.items():
value = try_non_nan_float_or_none(raw_value)
if value is None and elapsed_time is not None:
msg = f"Missing kinetic measurement for well position {position} at {elapsed_time}s."
raise AllotropeConversionError(msg)

data_elements[str(position)] = DataElement(
uuid=random_uuid_str(),
plate=header.name,
temperature=temperature,
wavelength=wavelength,
position=str(position),
value=NEGATIVE_ZERO if value is None else value,
error_document=(
[ErrorDocument(str(raw_value), header.read_mode)]
if value is None
else []
),
elapsed_time=[elapsed_time] if elapsed_time is not None else [],
kinetic_measures=[value] if elapsed_time is not None else [],
)

return PlateWavelengthData(wavelength, data_elements)
return PlateWavelengthData(
wavelength,
data_elements={
str(position): DataElement(
uuid=random_uuid_str(),
plate=plate_name,
temperature=temperature,
wavelength=wavelength,
position=str(position),
value=value,
elapsed_time=[elapsed_time] if elapsed_time is not None else [],
kinetic_measures=[value] if elapsed_time is not None else [],
)
for position, value in data.items()
},
)

def update_kinetic_data_elements(
self, elapsed_time: float, df_data: pd.DataFrame
) -> None:
data = {
f"{num_to_chars(row_idx)}{col}": try_non_nan_float_or_none(raw_value)
f"{num_to_chars(row_idx)}{col}": value
for row_idx, *row_data in df_data.itertuples()
for col, raw_value in zip(df_data.columns, row_data, strict=True)
if (value := try_non_nan_float_or_none(raw_value)) is not None
}
for position, value in data.items():
if value is None:
try:
self.data_elements[position].elapsed_time.append(elapsed_time)
self.data_elements[position].kinetic_measures.append(value)
except KeyError as e:
msg = f"Missing kinetic measurement for well position {position} at {elapsed_time}s."
raise AllotropeConversionError(msg)

self.data_elements[position].elapsed_time.append(elapsed_time)
self.data_elements[position].kinetic_measures.append(value)
raise AllotropeConversionError(msg) from e


@dataclass(frozen=True)
Expand Down Expand Up @@ -467,7 +455,7 @@ def _get_wavelength_data(
end = start + header.num_columns
wavelength_data.append(
PlateWavelengthData.create(
header=header,
plate_name=header.name,
temperature=temperature,
elapsed_time=elapsed_time,
wavelength=header.wavelengths[idx],
Expand Down Expand Up @@ -536,6 +524,8 @@ def create(

def iter_data_elements(self, position: str) -> Iterator[DataElement]:
for wavelength_data in self.raw_data.wavelength_data:
if position not in wavelength_data.data_elements:
continue
yield wavelength_data.data_elements[position]


Expand All @@ -545,29 +535,26 @@ class TimeMeasurementData:

@staticmethod
def create(
header: PlateHeader,
plate_name: str,
wavelength: float,
row: pd.Series[float],
) -> TimeMeasurementData:
temperature = try_non_nan_float_or_none(str(row.iloc[1]))
data_elements = {}

for position, raw_value in row.iloc[2:].items():
value = try_non_nan_float_or_none(raw_value)
error_document = []
if value is None:
error_document.append(ErrorDocument(str(raw_value), header.read_mode))
data_elements[str(position)] = DataElement(
uuid=random_uuid_str(),
plate=header.name,
temperature=temperature,
wavelength=wavelength,
position=str(position),
value=NEGATIVE_ZERO if value is None else value,
error_document=error_document,
)

return TimeMeasurementData(data_elements)
return TimeMeasurementData(
data_elements={
str(position): DataElement(
uuid=random_uuid_str(),
plate=plate_name,
temperature=temperature,
wavelength=wavelength,
position=str(position),
value=value,
)
for position, raw_value in row.iloc[2:].items()
if (value := try_non_nan_float_or_none(str(raw_value))) is not None
},
)


@dataclass(frozen=True)
Expand All @@ -578,7 +565,7 @@ class TimeWavelengthData:
@staticmethod
def create(
reader: CsvReader,
header: PlateHeader,
plate_name: str,
wavelength: float,
columns: pd.Series[str],
) -> TimeWavelengthData:
Expand All @@ -590,7 +577,7 @@ def create(
return TimeWavelengthData(
wavelength=wavelength,
measurement_data=[
TimeMeasurementData.create(header, wavelength, row)
TimeMeasurementData.create(plate_name, wavelength, row)
for _, row in data.iterrows()
],
)
Expand All @@ -611,7 +598,7 @@ def create(reader: CsvReader, header: PlateHeader) -> TimeRawData:
wavelength_data=[
TimeWavelengthData.create(
reader,
header,
header.name,
wavelength,
columns,
)
Expand Down Expand Up @@ -671,6 +658,8 @@ def create(
def iter_data_elements(self, position: str) -> Iterator[DataElement]:
for wavelength_data in self.raw_data.wavelength_data:
for measurement_data in wavelength_data.measurement_data:
if position not in measurement_data.data_elements:
continue
yield measurement_data.data_elements[position]


Expand Down Expand Up @@ -1094,7 +1083,6 @@ class StructureData:
def create(reader: CsvReader) -> StructureData:
block_list = BlockList.create(reader)

# update sample_id if it was reported in the group blocks and include erros from calculated data
for group_block in block_list.group_blocks:
for group_sample_data in group_block.group_data.sample_data:
for group_data_element in group_sample_data.data_elements:
Expand All @@ -1103,9 +1091,5 @@ def create(reader: CsvReader) -> StructureData:
group_data_element.position
):
data_element.sample_id = group_data_element.sample
data_element.error_document += (
group_data_element.errors
+ group_sample_data.aggregated_errors
)

return StructureData(block_list)
Loading

0 comments on commit 81e24b8

Please sign in to comment.