Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove: SoftMax Pro report non numeric values in error document #725

Merged
merged 1 commit into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,6 @@ def _create_measurements(plate_block: PlateBlock, position: str) -> list[Measure
total_measurement_time_setting=plate_block.header.read_time,
read_interval_setting=plate_block.header.read_interval,
number_of_scans_setting=plate_block.header.kinetic_points,
# Error documents
error_document=data_element.error_document,
)
for idx, data_element in enumerate(plate_block.iter_data_elements(position))
]
Expand Down
174 changes: 79 additions & 95 deletions src/allotropy/parsers/moldev_softmax_pro/softmax_pro_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@
import pandas as pd

from allotropy.allotrope.schema_mappers.adm.plate_reader.rec._2024._06.plate_reader import (
ErrorDocument,
MeasurementType,
ScanPositionSettingPlateReader,
)
from allotropy.exceptions import (
AllotropeConversionError,
get_key_or_error,
)
from allotropy.parsers.constants import NEGATIVE_ZERO
from allotropy.parsers.lines_reader import CsvReader
from allotropy.parsers.utils.pandas import rm_df_columns, SeriesData, set_columns
from allotropy.parsers.utils.uuids import random_uuid_str
Expand Down Expand Up @@ -114,15 +112,13 @@ class GroupDataElement:
position: str
plate: str
entries: list[GroupDataElementEntry]
errors: list[ErrorDocument]


@dataclass(frozen=True)
class GroupSampleData:
identifier: str
data_elements: list[GroupDataElement]
aggregated_entries: list[GroupDataElementEntry]
aggregated_errors: list[ErrorDocument]

@classmethod
def create(cls, data: pd.DataFrame) -> GroupSampleData:
Expand All @@ -131,7 +127,7 @@ def create(cls, data: pd.DataFrame) -> GroupSampleData:
identifier = top_row[str, "Sample"]
data = rm_df_columns(data, r"^Sample$|^Standard Value|^R$|^Unnamed: \d+$")
# Columns are considered "numeric" if the value of the first row is a float
# Non-numeric values such as "Mask" and "Range?" will be reported as errors.
# "Mask" and "Range?" are special cases that will be considered NaN.
numeric_columns = [
column
for column in data.columns
Expand All @@ -147,43 +143,40 @@ def create(cls, data: pd.DataFrame) -> GroupSampleData:
else:
normal_columns.append(column)

data_elements = []
for row in row_data:
entries, errors = cls._get_entries_and_errors(row, normal_columns)
data_elements.append(
return GroupSampleData(
identifier=identifier,
data_elements=[
GroupDataElement(
sample=identifier,
position=row[str, ["Well", "Wells"]],
plate=row[str, "WellPlateName"],
entries=entries,
errors=errors,
entries=[
element_entry
for column_name in normal_columns
if (element_entry := cls._get_element_entry(row, column_name))
is not None
],
)
)

aggregated_entries, aggregated_errors = cls._get_entries_and_errors(
top_row, aggregated_columns
)

return GroupSampleData(
identifier=identifier,
data_elements=data_elements,
aggregated_entries=aggregated_entries,
aggregated_errors=aggregated_errors,
for row in row_data
],
aggregated_entries=[
element_entry
for column_name in aggregated_columns
if (element_entry := cls._get_element_entry(top_row, column_name))
is not None
],
)

@classmethod
def _get_entries_and_errors(
cls, data_row: SeriesData, column_names: list[str]
) -> tuple[list[GroupDataElementEntry], list[ErrorDocument]]:
entries = []
errors = []
for column in column_names:
value = data_row.get(float, column)
if value is not None:
entries.append(GroupDataElementEntry(column, value))
elif (error := data_row.get(str, column)) is not None:
errors.append(ErrorDocument(error, column))
return entries, errors
def _get_element_entry(
cls, data_row: SeriesData, column_name: str
) -> GroupDataElementEntry | None:
if (value := data_row.get(float, column_name)) is not None:
return GroupDataElementEntry(
name=column_name,
value=value,
)
return None


@dataclass(frozen=True)
Expand Down Expand Up @@ -311,7 +304,6 @@ class DataElement:
wavelength: float
position: str
value: float
error_document: list[ErrorDocument]
elapsed_time: list[float] = field(default_factory=list)
kinetic_measures: list[float | None] = field(default_factory=list)
sample_id: str | None = None
Expand All @@ -334,58 +326,54 @@ class PlateWavelengthData:

@staticmethod
def create(
header: PlateHeader,
plate_name: str,
temperature: float | None,
elapsed_time: float | None,
wavelength: float,
df_data: pd.DataFrame,
) -> PlateWavelengthData:
# Since value is required for the measurement class (absorbance, luminescense and fluorescence)
# we don't store data for NaN values
# TODO: Report error documents for NaN values
data = {
f"{num_to_chars(row_idx)}{col}": raw_value
f"{num_to_chars(row_idx)}{col}": value
for row_idx, *row_data in df_data.itertuples()
for col, raw_value in zip(df_data.columns, row_data, strict=True)
if (value := try_non_nan_float_or_none(raw_value)) is not None
}

data_elements = {}
for position, raw_value in data.items():
value = try_non_nan_float_or_none(raw_value)
if value is None and elapsed_time is not None:
msg = f"Missing kinetic measurement for well position {position} at {elapsed_time}s."
raise AllotropeConversionError(msg)

data_elements[str(position)] = DataElement(
uuid=random_uuid_str(),
plate=header.name,
temperature=temperature,
wavelength=wavelength,
position=str(position),
value=NEGATIVE_ZERO if value is None else value,
error_document=(
[ErrorDocument(str(raw_value), header.read_mode)]
if value is None
else []
),
elapsed_time=[elapsed_time] if elapsed_time is not None else [],
kinetic_measures=[value] if elapsed_time is not None else [],
)

return PlateWavelengthData(wavelength, data_elements)
return PlateWavelengthData(
wavelength,
data_elements={
str(position): DataElement(
uuid=random_uuid_str(),
plate=plate_name,
temperature=temperature,
wavelength=wavelength,
position=str(position),
value=value,
elapsed_time=[elapsed_time] if elapsed_time is not None else [],
kinetic_measures=[value] if elapsed_time is not None else [],
)
for position, value in data.items()
},
)

def update_kinetic_data_elements(
self, elapsed_time: float, df_data: pd.DataFrame
) -> None:
data = {
f"{num_to_chars(row_idx)}{col}": try_non_nan_float_or_none(raw_value)
f"{num_to_chars(row_idx)}{col}": value
for row_idx, *row_data in df_data.itertuples()
for col, raw_value in zip(df_data.columns, row_data, strict=True)
if (value := try_non_nan_float_or_none(raw_value)) is not None
}
for position, value in data.items():
if value is None:
try:
self.data_elements[position].elapsed_time.append(elapsed_time)
self.data_elements[position].kinetic_measures.append(value)
except KeyError as e:
msg = f"Missing kinetic measurement for well position {position} at {elapsed_time}s."
raise AllotropeConversionError(msg)

self.data_elements[position].elapsed_time.append(elapsed_time)
self.data_elements[position].kinetic_measures.append(value)
raise AllotropeConversionError(msg) from e


@dataclass(frozen=True)
Expand Down Expand Up @@ -467,7 +455,7 @@ def _get_wavelength_data(
end = start + header.num_columns
wavelength_data.append(
PlateWavelengthData.create(
header=header,
plate_name=header.name,
temperature=temperature,
elapsed_time=elapsed_time,
wavelength=header.wavelengths[idx],
Expand Down Expand Up @@ -536,6 +524,8 @@ def create(

def iter_data_elements(self, position: str) -> Iterator[DataElement]:
for wavelength_data in self.raw_data.wavelength_data:
if position not in wavelength_data.data_elements:
continue
yield wavelength_data.data_elements[position]


Expand All @@ -545,29 +535,26 @@ class TimeMeasurementData:

@staticmethod
def create(
header: PlateHeader,
plate_name: str,
wavelength: float,
row: pd.Series[float],
) -> TimeMeasurementData:
temperature = try_non_nan_float_or_none(str(row.iloc[1]))
data_elements = {}

for position, raw_value in row.iloc[2:].items():
value = try_non_nan_float_or_none(raw_value)
error_document = []
if value is None:
error_document.append(ErrorDocument(str(raw_value), header.read_mode))
data_elements[str(position)] = DataElement(
uuid=random_uuid_str(),
plate=header.name,
temperature=temperature,
wavelength=wavelength,
position=str(position),
value=NEGATIVE_ZERO if value is None else value,
error_document=error_document,
)

return TimeMeasurementData(data_elements)
return TimeMeasurementData(
data_elements={
str(position): DataElement(
uuid=random_uuid_str(),
plate=plate_name,
temperature=temperature,
wavelength=wavelength,
position=str(position),
value=value,
)
for position, raw_value in row.iloc[2:].items()
if (value := try_non_nan_float_or_none(str(raw_value))) is not None
},
)


@dataclass(frozen=True)
Expand All @@ -578,7 +565,7 @@ class TimeWavelengthData:
@staticmethod
def create(
reader: CsvReader,
header: PlateHeader,
plate_name: str,
wavelength: float,
columns: pd.Series[str],
) -> TimeWavelengthData:
Expand All @@ -590,7 +577,7 @@ def create(
return TimeWavelengthData(
wavelength=wavelength,
measurement_data=[
TimeMeasurementData.create(header, wavelength, row)
TimeMeasurementData.create(plate_name, wavelength, row)
for _, row in data.iterrows()
],
)
Expand All @@ -611,7 +598,7 @@ def create(reader: CsvReader, header: PlateHeader) -> TimeRawData:
wavelength_data=[
TimeWavelengthData.create(
reader,
header,
header.name,
wavelength,
columns,
)
Expand Down Expand Up @@ -671,6 +658,8 @@ def create(
def iter_data_elements(self, position: str) -> Iterator[DataElement]:
for wavelength_data in self.raw_data.wavelength_data:
for measurement_data in wavelength_data.measurement_data:
if position not in measurement_data.data_elements:
continue
yield measurement_data.data_elements[position]


Expand Down Expand Up @@ -1094,7 +1083,6 @@ class StructureData:
def create(reader: CsvReader) -> StructureData:
block_list = BlockList.create(reader)

# update sample_id if it was reported in the group blocks and include erros from calculated data
for group_block in block_list.group_blocks:
for group_sample_data in group_block.group_data.sample_data:
for group_data_element in group_sample_data.data_elements:
Expand All @@ -1103,9 +1091,5 @@ def create(reader: CsvReader) -> StructureData:
group_data_element.position
):
data_element.sample_id = group_data_element.sample
data_element.error_document += (
group_data_element.errors
+ group_sample_data.aggregated_errors
)

return StructureData(block_list)
Loading
Loading