diff --git a/eye2bids/edf2bids.py b/eye2bids/edf2bids.py index 1aeee91..2fbb29b 100644 --- a/eye2bids/edf2bids.py +++ b/eye2bids/edf2bids.py @@ -221,11 +221,11 @@ def _has_validation(df: pd.DataFrame) -> bool: def _extract_MaximalCalibrationError(df: pd.DataFrame) -> list[float]: - return np.array(_validations(df)[[11]]).astype(float).tolist() + return ((_validations(df)[[11]]).astype(float)).to_numpy().tolist() def _extract_AverageCalibrationError(df: pd.DataFrame) -> list[float]: - return np.array(_validations(df)[[9]]).astype(float).tolist() + return ((_validations(df)[[9]]).astype(float)).to_numpy().tolist() def _extract_ManufacturersModelName(events: list[str]) -> str: @@ -277,7 +277,7 @@ def _extract_ScreenResolution(df: pd.DataFrame) -> list[int]: def _extract_StartTime(events: list[str]) -> int: StartTime = ( - np.array(pd.DataFrame([st.split() for st in events if st.startswith("START")])[1]) + (pd.DataFrame([st.split() for st in events if st.startswith("START")])[1]) .astype(int) .tolist() ) @@ -289,13 +289,12 @@ def _extract_StartTime(events: list[str]) -> int: Please consider changing your code accordingly for future eyetracking experiments.\n""" ) - return StartTime[0] - return StartTime + return StartTime[0] def _extract_StopTime(events: list[str]) -> int: StopTime = ( - np.array(pd.DataFrame([so.split() for so in events if so.startswith("END")])[1]) + (pd.DataFrame([so.split() for so in events if so.startswith("END")])[1]) .astype(int) .tolist() ) @@ -307,8 +306,7 @@ def _extract_StopTime(events: list[str]) -> int: Please consider changing your code accordingly for future eyetracking experiments.\n""" ) - return StopTime[-1] - return StopTime + return StopTime[-1] def _load_asc_file(events_asc_file: str | Path) -> list[str]: @@ -328,8 +326,10 @@ def _load_asc_file_as_reduced_df(events_asc_file: str | Path) -> pd.DataFrame: return pd.DataFrame(df_ms.iloc[0:, 2:]) -def _df_events_after_start(events: list[str]) -> pd.DataFrame: - """Extract data between START and END messages.""" +def _df_events_from_first_start(events: list[str]) -> pd.DataFrame: + """Extract data starting from the first time START appears + and including last time END appears. + """ start_index = next( i for i, line in enumerate(events) if re.match(r"START\s+.*", line) ) @@ -338,33 +338,44 @@ def _df_events_after_start(events: list[str]) -> pd.DataFrame: ) if end_index > start_index: - data_lines = events[start_index + 1 : end_index] + data_lines = events[start_index : end_index + 1] return pd.DataFrame([line.strip().split("\t") for line in data_lines]) else: return e2b_log.warning("No 'END' found after the selected 'START'.") -def _df_physioevents(events_after_start: pd.DataFrame) -> pd.DataFrame: - events_after_start["Event_Letters"] = ( - events_after_start[0].str.extractall(r"([A-Za-z]+)").groupby(level=0).agg("".join) +def _df_physioevents(events_from_start: pd.DataFrame) -> pd.DataFrame: + events_from_start["Event_Letters"] = ( + events_from_start[0].str.extractall(r"([A-Za-z]+)").groupby(level=0).agg("".join) ) - events_after_start["Event_Numbers"] = events_after_start[0].str.extract(r"(\d+)") - events_after_start[["msg_timestamp", "message"]] = events_after_start[1].str.split( + events_from_start["Event_Numbers"] = events_from_start[0].str.extract(r"(\d+)") + events_from_start[["msg_timestamp", "message"]] = events_from_start[1].str.split( n=1, expand=True ) - events_after_start["message"] = events_after_start["message"].astype(str) + events_from_start["message"] = events_from_start["message"].astype(str) + + events_from_start["message"] = np.where( + events_from_start["Event_Letters"] == "START", + "START", + np.where( + events_from_start["Event_Letters"] == "END", + "END", + events_from_start.get("message", ""), + ), + ) - msg_mask = events_after_start["Event_Letters"] == "MSG" - events_after_start.loc[msg_mask, "Event_Numbers"] = events_after_start.loc[ + msg_mask = events_from_start["Event_Letters"].isin(["MSG", "START", "END"]) + events_from_start.loc[msg_mask, "Event_Numbers"] = events_from_start.loc[ msg_mask, "msg_timestamp" ] + physioevents_reordered = ( pd.concat( [ - events_after_start["Event_Numbers"], - events_after_start[2], - events_after_start["Event_Letters"], - events_after_start["message"], + events_from_start["Event_Numbers"], + events_from_start[2], + events_from_start["Event_Letters"], + events_from_start["message"], ], axis=1, ignore_index=True, @@ -378,14 +389,28 @@ def _df_physioevents(events_after_start: pd.DataFrame) -> pd.DataFrame: def _physioevents_for_eye( physioevents_reordered: pd.DataFrame, eye: str = "L" ) -> pd.DataFrame: - physioevents_eye_list = ["MSG", f"EFIX{eye}", f"ESACC{eye}", f"EBLINK{eye}"] + physioevents_eye_list = [ + "MSG", + f"EFIX{eye}", + f"ESACC{eye}", + f"EBLINK{eye}", + "START", + "END", + ] physioevents = physioevents_reordered[ physioevents_reordered["trial_type"].isin(physioevents_eye_list) ] - physioevents = physioevents.replace( - {f"EFIX{eye}": "fixation", f"ESACC{eye}": "saccade", "MSG": np.nan, None: np.nan} + physioevents["trial_type"] = physioevents["trial_type"].replace( + { + f"EFIX{eye}": "fixation", + f"ESACC{eye}": "saccade", + "MSG": np.nan, + "START": np.nan, + "END": np.nan, + None: np.nan, + } ) physioevents["blink"] = 0 @@ -406,6 +431,7 @@ def _physioevents_for_eye( physioevents = physioevents[physioevents.trial_type != f"EBLINK{eye}"] physioevents["timestamp"] = physioevents["timestamp"].astype("Int64") + physioevents["duration"] = pd.to_numeric(physioevents["duration"], errors="coerce") physioevents["duration"] = physioevents["duration"].astype("Int64") physioevents = physioevents[ @@ -610,8 +636,8 @@ def edf2bids( # %% # Messages and events to dataframes - events_after_start = _df_events_after_start(events) - physioevents_reordered = _df_physioevents(events_after_start) + events_from_start = _df_events_from_first_start(events) + physioevents_reordered = _df_physioevents(events_from_start) physioevents_eye1 = _physioevents_for_eye(physioevents_reordered, eye="L") physioevents_eye2 = _physioevents_for_eye(physioevents_reordered, eye="R") diff --git a/tests/test_edf2bids.py b/tests/test_edf2bids.py index 5c129e2..a153701 100644 --- a/tests/test_edf2bids.py +++ b/tests/test_edf2bids.py @@ -633,4 +633,4 @@ def test_physioevents_value(folder, expected, eyelink_test_data_dir): output_dir / f"{input_file.stem}_recording-eye1_physioevents.tsv.gz" ) physioevents = pd.read_csv(expected_eyetrackphysio_tsv, sep="\t", header=None) - assert physioevents.iloc[3:10, 2].tolist() == expected + assert physioevents.iloc[4:11, 2].tolist() == expected