From d5d9c9f732b2960f79e2ab9b53453259c24ec811 Mon Sep 17 00:00:00 2001 From: Julia-Katharina Pfarr <111446107+julia-pfarr@users.noreply.github.com> Date: Wed, 22 Jan 2025 10:41:54 -0500 Subject: [PATCH] issue #102 (#113) * Tried to fix bugs, but failed edf2asc suddenly gives out a random order for the calibration positions. E.g., calibration point 3 is listed after calibration point 4. That's why I had to change the function _extract_Calibration poisition (as well as _get_calibration_positions). However, now the tests are failing because something is wrong with the lists in lists thingi and also it tries to get the calibration positions also for the testfiles which have none. I thought I specified that within the functions but obviously it doesn't work. * fix bug: make sure that EyeTrackingMethod is empty if no calibration because otherwise it writes wrong content into the variable * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix test: parantheses * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add PhysioType to metadata * Update _base.py * pre-commit fixes * #96: fix start and stop time to be numbers instead of arrays * issue #102 * modify lines in test_physioevents_value. They changed because we are now capturing START and END messages and thus more lines in output. * fix #96 and values of calibration errors * fix value error calibration values --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- eye2bids/edf2bids.py | 82 +++++++++++++++++++++++++++--------------- tests/test_edf2bids.py | 2 +- 2 files changed, 55 insertions(+), 29 deletions(-) diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py index 1aeee91..2fbb29b 100644 --- a/eye2bids/edf2bids.py +++ b/eye2bids/edf2bids.py @@ -221,11 +221,11 @@ def _has_validation(df: pd.DataFrame) -> bool: def _extract_MaximalCalibrationError(df: pd.DataFrame) -> list[float]: - return np.array(_validations(df)[[11]]).astype(float).tolist() + return ((_validations(df)[[11]]).astype(float)).to_numpy().tolist() def _extract_AverageCalibrationError(df: pd.DataFrame) -> list[float]: - return np.array(_validations(df)[[9]]).astype(float).tolist() + return ((_validations(df)[[9]]).astype(float)).to_numpy().tolist() def _extract_ManufacturersModelName(events: list[str]) -> str: @@ -277,7 +277,7 @@ def _extract_ScreenResolution(df: pd.DataFrame) -> list[int]: def _extract_StartTime(events: list[str]) -> int: StartTime = ( - np.array(pd.DataFrame([st.split() for st in events if st.startswith("START")])[1]) + (pd.DataFrame([st.split() for st in events if st.startswith("START")])[1]) .astype(int) .tolist() ) @@ -289,13 +289,12 @@ def _extract_StartTime(events: list[str]) -> int: Please consider changing your code accordingly for future eyetracking experiments.\n""" ) - return StartTime[0] - return StartTime + return StartTime[0] def _extract_StopTime(events: list[str]) -> int: StopTime = ( - np.array(pd.DataFrame([so.split() for so in events if so.startswith("END")])[1]) + (pd.DataFrame([so.split() for so in events if so.startswith("END")])[1]) .astype(int) .tolist() ) @@ -307,8 +306,7 @@ def _extract_StopTime(events: list[str]) -> int: Please consider changing your code accordingly for future eyetracking experiments.\n""" ) - return StopTime[-1] - return StopTime + return StopTime[-1] def _load_asc_file(events_asc_file: str | Path) -> list[str]: @@ -328,8 +326,10 @@ def _load_asc_file_as_reduced_df(events_asc_file: str | Path) -> pd.DataFrame: return pd.DataFrame(df_ms.iloc[0:, 2:]) -def _df_events_after_start(events: list[str]) -> pd.DataFrame: - """Extract data between START and END messages.""" +def _df_events_from_first_start(events: list[str]) -> pd.DataFrame: + """Extract data starting from the first time START appears + and including last time END appears. + """ start_index = next( i for i, line in enumerate(events) if re.match(r"START\s+.*", line) ) @@ -338,33 +338,44 @@ def _df_events_after_start(events: list[str]) -> pd.DataFrame: ) if end_index > start_index: - data_lines = events[start_index + 1 : end_index] + data_lines = events[start_index : end_index + 1] return pd.DataFrame([line.strip().split("\t") for line in data_lines]) else: return e2b_log.warning("No 'END' found after the selected 'START'.") -def _df_physioevents(events_after_start: pd.DataFrame) -> pd.DataFrame: - events_after_start["Event_Letters"] = ( - events_after_start[0].str.extractall(r"([A-Za-z]+)").groupby(level=0).agg("".join) +def _df_physioevents(events_from_start: pd.DataFrame) -> pd.DataFrame: + events_from_start["Event_Letters"] = ( + events_from_start[0].str.extractall(r"([A-Za-z]+)").groupby(level=0).agg("".join) ) - events_after_start["Event_Numbers"] = events_after_start[0].str.extract(r"(\d+)") - events_after_start[["msg_timestamp", "message"]] = events_after_start[1].str.split( + events_from_start["Event_Numbers"] = events_from_start[0].str.extract(r"(\d+)") + events_from_start[["msg_timestamp", "message"]] = events_from_start[1].str.split( n=1, expand=True ) - events_after_start["message"] = events_after_start["message"].astype(str) + events_from_start["message"] = events_from_start["message"].astype(str) + + events_from_start["message"] = np.where( + events_from_start["Event_Letters"] == "START", + "START", + np.where( + events_from_start["Event_Letters"] == "END", + "END", + events_from_start.get("message", ""), + ), + ) - msg_mask = events_after_start["Event_Letters"] == "MSG" - events_after_start.loc[msg_mask, "Event_Numbers"] = events_after_start.loc[ + msg_mask = events_from_start["Event_Letters"].isin(["MSG", "START", "END"]) + events_from_start.loc[msg_mask, "Event_Numbers"] = events_from_start.loc[ msg_mask, "msg_timestamp" ] + physioevents_reordered = ( pd.concat( [ - events_after_start["Event_Numbers"], - events_after_start[2], - events_after_start["Event_Letters"], - events_after_start["message"], + events_from_start["Event_Numbers"], + events_from_start[2], + events_from_start["Event_Letters"], + events_from_start["message"], ], axis=1, ignore_index=True, @@ -378,14 +389,28 @@ def _df_physioevents(events_after_start: pd.DataFrame) -> pd.DataFrame: def _physioevents_for_eye( physioevents_reordered: pd.DataFrame, eye: str = "L" ) -> pd.DataFrame: - physioevents_eye_list = ["MSG", f"EFIX{eye}", f"ESACC{eye}", f"EBLINK{eye}"] + physioevents_eye_list = [ + "MSG", + f"EFIX{eye}", + f"ESACC{eye}", + f"EBLINK{eye}", + "START", + "END", + ] physioevents = physioevents_reordered[ physioevents_reordered["trial_type"].isin(physioevents_eye_list) ] - physioevents = physioevents.replace( - {f"EFIX{eye}": "fixation", f"ESACC{eye}": "saccade", "MSG": np.nan, None: np.nan} + physioevents["trial_type"] = physioevents["trial_type"].replace( + { + f"EFIX{eye}": "fixation", + f"ESACC{eye}": "saccade", + "MSG": np.nan, + "START": np.nan, + "END": np.nan, + None: np.nan, + } ) physioevents["blink"] = 0 @@ -406,6 +431,7 @@ def _physioevents_for_eye( physioevents = physioevents[physioevents.trial_type != f"EBLINK{eye}"] physioevents["timestamp"] = physioevents["timestamp"].astype("Int64") + physioevents["duration"] = pd.to_numeric(physioevents["duration"], errors="coerce") physioevents["duration"] = physioevents["duration"].astype("Int64") physioevents = physioevents[ @@ -610,8 +636,8 @@ def edf2bids( # %% # Messages and events to dataframes - events_after_start = _df_events_after_start(events) - physioevents_reordered = _df_physioevents(events_after_start) + events_from_start = _df_events_from_first_start(events) + physioevents_reordered = _df_physioevents(events_from_start) physioevents_eye1 = _physioevents_for_eye(physioevents_reordered, eye="L") physioevents_eye2 = _physioevents_for_eye(physioevents_reordered, eye="R") diff --git a/tests/test_edf2bids.py b/tests/test_edf2bids.py index 5c129e2..a153701 100644 --- a/tests/test_edf2bids.py +++ b/tests/test_edf2bids.py @@ -633,4 +633,4 @@ def test_physioevents_value(folder, expected, eyelink_test_data_dir): output_dir / f"{input_file.stem}_recording-eye1_physioevents.tsv.gz" ) physioevents = pd.read_csv(expected_eyetrackphysio_tsv, sep="\t", header=None) - assert physioevents.iloc[3:10, 2].tolist() == expected + assert physioevents.iloc[4:11, 2].tolist() == expected