Skip to content

Commit

Permalink
Add quality control property for batch processing over genotypes (#76)
Browse files Browse the repository at this point in the history
* add `qc_fail` property to `Series` class

* add `qc_fail` property to dictionaries returned with traits by `MultipleDicotPipeline`

* add tests for `qc_fail` property

* test quality control in notebook for multiple dicots

* look at dtype of qc_fail

* fix typing annotation for qc_fail property
  • Loading branch information
eberrigan authored Apr 23, 2024
1 parent f8baa7b commit 035b153
Show file tree
Hide file tree
Showing 5 changed files with 295 additions and 107 deletions.
368 changes: 265 additions & 103 deletions notebooks/MultipleDicotPipeline.ipynb

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions sleap_roots/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class Series:
series_name: Name of the series derived from the HDF5 filename.
expected_count: Fetch the expected plant count for this series from the CSV.
group: Group name for the series from the CSV.
qc_fail: Flag to indicate if the series failed QC from the CSV.
"""

h5_path: Optional[str] = None
Expand Down Expand Up @@ -161,6 +162,20 @@ def group(self) -> str:
print(f"No group found for series {self.series_name} in CSV.")
return np.nan

@property
def qc_fail(self) -> Union[int, float]:
"""Flag to indicate if the series failed QC from the CSV."""
if not self.csv_path or not Path(self.csv_path).exists():
print("CSV path is not set or the file does not exist.")
return np.nan
df = pd.read_csv(self.csv_path)
try:
# Match the series_name (or plant_qr_code in the CSV) to fetch the QC flag
return df[df["plant_qr_code"] == self.series_name]["qc_cylinder"].iloc[0]
except IndexError:
print(f"No QC flag found for series {self.series_name} in CSV.")
return np.nan

def __len__(self) -> int:
"""Length of the series (number of images)."""
return len(self.video)
Expand Down
10 changes: 8 additions & 2 deletions sleap_roots/trait_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,12 +401,13 @@ def compute_multiple_dicots_traits(
csv_suffix: The suffix to append to the CSV file name. Default is ".all_frames_summary.csv".
Returns:
A dictionary containing the series name, group, aggregated traits, and summary statistics.
A dictionary containing the series name, group, qc_fail, aggregated traits, and summary statistics.
"""
# Initialize the return structure with the series name and group
result = {
"series": str(series.series_name),
"group": str(series.group),
"qc_fail": series.qc_fail,
"traits": {},
"summary_stats": {},
}
Expand Down Expand Up @@ -528,6 +529,11 @@ def compute_multiple_dicots_traits_for_groups(
# Group series by their group property
series_groups = {}
for series in series_list:
# Exclude series with qc_fail flag set to 1
if int(series.qc_fail) == 1:
print(f"Skipping series '{series.series_name}' due to qc_fail flag.")
continue
# Get the group name from the series object
group_name = str(series.group)
if group_name not in series_groups:
series_groups[group_name] = {"names": [], "series": []}
Expand Down Expand Up @@ -733,7 +739,7 @@ def compute_batch_multiple_dicots_traits_for_groups(
Returns:
A pandas DataFrame of computed traits summarized over all frames of each
series. The resulting dataframe will have a row for each series and a column
group. The resulting dataframe will have a row for each series and a column
for each series-level summarized trait.
Summarized traits are prefixed with the trait name and an underscore,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
6030_2,6030,2,h5s_predictions\ES\6030_2.h5,0,,3,,,,,,,
6035_1,6035,1,h5s_predictions\ES\6035_1.h5,0,,3,,,,,,,
6035_2,6035,2,h5s_predictions\ES\6035_2.h5,0,,3,,,,,,,
6039_1,6039,1,h5s_predictions\ES\6039_1.h5,0,,2,1,1,,,,,
6039_1,6039,1,h5s_predictions\ES\6039_1.h5,1,,2,1,1,,,,,
6039_2,6039,2,h5s_predictions\ES\6039_2.h5,0,,3,,,,,,,
6042_1,6042,1,h5s_predictions\ES\6042_1.h5,0,,3,,,,,,,
6042_2,6042,2,h5s_predictions\ES\6042_2.h5,0,,3,,,,,,,
Expand Down
7 changes: 6 additions & 1 deletion tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def csv_path(tmp_path):
# Create a dummy CSV file
csv_path = tmp_path / "dummy.csv"
csv_path.write_text(
"plant_qr_code,number_of_plants_cylinder,genotype\ndummy,10,1100\nseries2,15,Kitaake-X\n"
"plant_qr_code,number_of_plants_cylinder,genotype,qc_cylinder\ndummy,10,1100,0\nseries2,15,Kitaake-X,1\n"
)
return csv_path

Expand Down Expand Up @@ -85,6 +85,11 @@ def test_expected_count(series_instance, csv_path):
assert series_instance.expected_count == 10


def test_qc_cylinder(series_instance, csv_path):
series_instance.csv_path = csv_path
assert series_instance.qc_fail == 0


def test_len():
series = Series(video=["frame1", "frame2"])
assert len(series) == 2
Expand Down

0 comments on commit 035b153

Please sign in to comment.