Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

split physioevents and events json #92

Merged
merged 19 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 50 additions & 7 deletions eye2bids/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


class BasePhysioEventsJson(dict[str, Any]):
"""Handle content of physioevents sidedar."""
"""Handle content of physioevents sidecar."""

input_file: Path
two_eyes: bool
Expand All @@ -24,14 +24,19 @@ def __init__(self, metadata: None | dict[str, Any] = None) -> None:
self["ForeignIndexColumn"] = "timestamp"

self["blink"] = {
"Description": "One indicates if the eye was closed, zero if open."
"Description": "Gives status of the eye.",
"Levels": {
"0": "Indicates if the eye was open.",
"1": "Indicates if the eye was closed.",
},
}
self["message"] = {"Description": "String messages logged by the eye-tracker."}
self["trial_type"] = {
"Description": (
"Event type as identified by the eye-tracker's model "
"((either 'n/a' if not applicabble, 'fixation', or 'saccade')."
)
"Description": "Event type as identified by the eye-tracker's model.",
"Levels": {
"fixation": "Indicates a fixation.",
"saccade": "Indicates a saccade.",
},
}

self.update_from_metadata(metadata)
Expand Down Expand Up @@ -61,6 +66,44 @@ def write(
self,
output_dir: Path,
recording: str | None = None,
) -> None:
"""Write to json."""
content = {key: value for key, value in self.items() if self[key] is not None}
with open(output_dir / self.output_filename(recording=recording), "w") as outfile:
json.dump(content, outfile, indent=4)
# e2b_log.info(f"file generated: {self.output_filename(recording=recording)}")


class BaseEventsJson(dict[str, Any]):
"""Handle content of events sidecar."""

input_file: Path

def __init__(self, metadata: None | dict[str, Any] = None) -> None:
self.update_from_metadata(metadata)

def update_from_metadata(self, metadata: None | dict[str, Any] = None) -> None:
"""Update content of json side car based on metadata."""
if metadata is None:
return None

self["TaskName"] = metadata.get("TaskName")
self["InstitutionAddress"] = metadata.get("InstitutionAddress")
self["InstitutionName"] = metadata.get("InstitutionName")
self["StimulusPresentation"] = {
"ScreenDistance": metadata.get("ScreenDistance"),
"ScreenRefreshRate": metadata.get("ScreenRefreshRate"),
"ScreenSize": metadata.get("ScreenSize"),
}

def output_filename(self) -> str:
"""Generate output filename."""
filename = self.input_file.stem
return f"{filename}_events.json"

def write(
self,
output_dir: Path,
extra_metadata: dict[str, str | list[str] | list[float]] | None = None,
) -> None:
"""Write to json."""
Expand All @@ -69,7 +112,7 @@ def write(
self[key] = value

content = {key: value for key, value in self.items() if self[key] is not None}
with open(output_dir / self.output_filename(recording=recording), "w") as outfile:
with open(output_dir / self.output_filename(), "w") as outfile:
json.dump(content, outfile, indent=4)


Expand Down
44 changes: 29 additions & 15 deletions eye2bids/edf2bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from rich.prompt import Prompt
from yaml.loader import SafeLoader

from eye2bids._base import BasePhysioEventsJson, BasePhysioJson
from eye2bids._base import BaseEventsJson, BasePhysioEventsJson, BasePhysioJson
from eye2bids._parser import global_parser
from eye2bids.logger import eye2bids_logger

Expand Down Expand Up @@ -324,13 +324,13 @@ def _load_asc_file_as_df(events_asc_file: str | Path) -> pd.DataFrame:


def _load_asc_file_as_reduced_df(events_asc_file: str | Path) -> pd.DataFrame:
# reduced dataframe without MSG and sample columns
"""Reduce dataframe without MSG and sample columns."""
df_ms = _load_asc_file_as_df(events_asc_file)
return pd.DataFrame(df_ms.iloc[0:, 2:])


def _df_events_after_start(events: list[str]) -> pd.DataFrame:

"""Extract data between START and END messages."""
start_index = next(
i for i, line in enumerate(events) if re.match(r"START\s+.*", line)
)
Expand All @@ -342,7 +342,7 @@ def _df_events_after_start(events: list[str]) -> pd.DataFrame:
data_lines = events[start_index + 1 : end_index]
return pd.DataFrame([line.strip().split("\t") for line in data_lines])
else:
return print("No 'END' found after the selected 'START'.")
return e2b_log.warning("No 'END' found after the selected 'START'.")


def _df_physioevents(events_after_start: pd.DataFrame) -> pd.DataFrame:
Expand Down Expand Up @@ -491,10 +491,12 @@ def generate_physio_json(
)[1::2]

base_json.write(output_dir=output_dir, recording="eye1", extra_metadata=metadata_eye1)
e2b_log.info(f"file generated: {base_json.output_filename()}")
if base_json.two_eyes:
base_json.write(
output_dir=output_dir, recording="eye2", extra_metadata=metadata_eye2
)
e2b_log.info(f"file generated: {base_json.output_filename()}")


def edf2bids(
Expand All @@ -512,7 +514,7 @@ def edf2bids(
input_file, metadata_file, output_dir, interactive, force
)

# CONVERSION events
# CONVERSION events #
events_asc_file = _convert_edf_to_asc_events(input_file)

if not events_asc_file.exists():
Expand All @@ -521,35 +523,46 @@ def edf2bids(
f"{input_file}"
)

# %% Sidecar eye-physio.json
# SIDECARS #
# %% physio.json
generate_physio_json(input_file, metadata_file, output_dir, events_asc_file)

# %% physioevents.json Metadata
# %% physioevents.json
events = _load_asc_file(events_asc_file)

df_ms_reduced = _load_asc_file_as_reduced_df(events_asc_file)

physioevents_json = BasePhysioEventsJson()

physioevents_json.input_file = input_file
physioevents_json.two_eyes = _2eyesmode(df_ms_reduced)

physioevents_json.write(output_dir=output_dir, recording="eye1")
e2b_log.info(f"file generated: {physioevents_json.output_filename()}")
if physioevents_json.two_eyes:
physioevents_json.write(output_dir=output_dir, recording="eye2")
e2b_log.info(f"file generated: {physioevents_json.output_filename()}")
# %% events.json
if metadata_file is None:
metadata = {}
else:
with open(metadata_file) as f:
metadata = yaml.load(f, Loader=SafeLoader)

events_json = BasePhysioEventsJson(metadata)
events_json = BaseEventsJson(metadata)

events_json.input_file = input_file
events_json.two_eyes = _2eyesmode(df_ms_reduced)

events_json["StimulusPresentation"]["ScreenResolution"] = _extract_ScreenResolution(
df_ms_reduced
)

events_json.write(output_dir=output_dir, recording="eye1")
if events_json.two_eyes:
events_json.write(output_dir=output_dir, recording="eye2")
events_json.input_file = input_file

events_json.write(output_dir=output_dir)
e2b_log.info(f"file generated: {events_json.output_filename()}")

# %%
# Samples to dataframe
# SAMPLES #
# samples to dataframe
samples_asc_file = _convert_edf_to_asc_samples(input_file)
if not samples_asc_file.exists():
e2b_log.error(
Expand Down Expand Up @@ -596,6 +609,7 @@ def edf2bids(

e2b_log.info(f"file generated: {output_filename_eye2}")

# MESSAGES AND PHYSIOEVENTS #
# %%
# Messages and events to dataframes

Expand Down
55 changes: 43 additions & 12 deletions tests/test_edf2bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,46 @@ def test_convert_edf_to_asc_events(input_file):
assert Path(asc_file).exists()


def _check_output_exists(output_dir, input_file, eye=1):
def _check_output_exists(output_dir: Path, input_file: Path, eye=1):
for suffix in [".json", ".tsv.gz"]:
for ending in [
"_physioevents",
"_physio",
]:
assert (
(output_dir / f"{input_file.stem}_recording-eye{eye}{ending}")
.with_suffix(suffix)
.exists()
)


def _check_output_content(output_dir, input_file, eye=1):
"""Check content of output.

Make sure each column in the tsv has a description.

Ensure that all timestamps in physio.tsv.gz are evenly spaced:
as they should be regular sampled.
"""
Remi-Gau marked this conversation as resolved.
Show resolved Hide resolved
for ending in [
"_physioevents.json",
"_physio.json",
"_physio.tsv.gz",
"_physioevents.tsv.gz",
"_physioevents",
"_physio",
]:
assert (output_dir / f"{input_file.stem}_recording-eye{eye}{ending}").exists()
tsv_file = (
output_dir / f"{input_file.stem}_recording-eye{eye}{ending}"
).with_suffix(".tsv.gz")
json_file = (
output_dir / f"{input_file.stem}_recording-eye{eye}{ending}"
).with_suffix(".json")

df = pd.read_csv(tsv_file, sep="\t")
with open(json_file) as f:
metadata = json.load(f)
assert len(df.columns) == len(metadata["Columns"])

# space between timestamps should always be the same.
if ending == "_physio":
assert len(df[0].diff().unique()) == 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

apparently this is failing for some data

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FAILED tests/test_edf2bids.py::test_edf_end_to_end - assert 5 == 1
 +  where 5 = len(array([        nan, 2.00000e+00, 1.10336e+05, 1.76840e+04, 1.61320e+04]))
 +    where array([        nan, 2.00000e+00, 1.10336e+05, 1.76840e+04, 1.61320e+04]) = unique()
 +      where unique = 0         NaN\n1         2.0\n2         2.0\n3         2.0\n4         2.0\n         ... \n531148    2.0\n531149    2.0\n531150    2.0\n531151    2.0\n531152    2.0\nName: 0, Length: 531153, dtype: float64.unique
 +        where 0         NaN\n1         2.0\n2         2.0\n3         2.0\n4         2.0\n         ... \n531148    2.0\n531149    2.0\n531150    2.0\n531151    2.0\n531152    2.0\nName: 0, Length: 531153, dtype: float64 = diff()
 +          where diff = 0         3098681\n1         3098683\n2         3098685\n3         3098687\n4         3098689\n           ...   \n531148    4305123\n531149    4305125\n531150    4305127\n531151    4305129\n531152    4305131\nName: 0, Length: 531153, dtype: int64.diff
FAILED tests/test_edf2bids.py::test_edf_end_to_end_2eyes - assert 2 == 1
 +  where 2 = len(array([nan,  1.]))
 +    where array([nan,  1.]) = unique()
 +      where unique = 0        NaN\n1        1.0\n2        1.0\n3        1.0\n4        1.0\n        ... \n51668    1.0\n51669    1.0\n51670    1.0\n51671    1.0\n51672    1.0\nName: 0, Length: 51673, dtype: float64.unique
 +        where 0        NaN\n1        1.0\n2        1.0\n3        1.0\n4        1.0\n        ... \n51668    1.0\n51669    1.0\n51670    1.0\n51671    1.0\n51672    1.0\nName: 0, Length: 51673, dtype: float64 = diff()
 +          where diff = 0        767979\n1        767980\n2        767981\n3        767982\n4        767983\n          ...  \n51668    819647\n51669    819648\n51670    819649\n51671    819650\n51672    819651\nName: 0, Length: 51673, dtype: int64.diff

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should the the time stamp column contain nan?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it should not. It's kind of related to #69 where we said we would need an additional check to ensure that the timestamp in physio.tsv.gz is actually continuous because that might not be the case, at least for eyetrackers other than eyelink. Physioevents.tsv.gz should also not have NaNs in timestamp, but here it does not have to be continuous because a timestamp is only logged if there is an actual corresponding event.

So long story short, we need something checking that there are no NaNs for both, and a check for physio.tsv.gz that it's continuous.



@pytest.mark.skipif(not _check_edf2asc_present(), reason="edf2asc missing")
Expand All @@ -60,10 +92,9 @@ def test_edf_end_to_end(eyelink_test_data_dir):
edf2bids(input_file=input_file, metadata_file=metadata_file, output_dir=output_dir)

_check_output_exists(output_dir, input_file)
_check_output_content(output_dir, input_file)

expected_events_sidecar = (
output_dir / f"{input_file.stem}_recording-eye1_physioevents.json"
)
expected_events_sidecar = output_dir / f"{input_file.stem}_events.json"
with open(expected_events_sidecar) as f:
events = json.load(f)
assert events["StimulusPresentation"]["ScreenResolution"] == [1919, 1079]
Expand Down Expand Up @@ -107,10 +138,9 @@ def test_edf_end_to_end_2eyes(eyelink_test_data_dir):
edf2bids(input_file=input_file, metadata_file=metadata_file, output_dir=output_dir)

_check_output_exists(output_dir, input_file)
_check_output_content(output_dir, input_file)

expected_events_sidecar_eye1 = (
output_dir / f"{input_file.stem}_recording-eye1_physioevents.json"
)
expected_events_sidecar_eye1 = output_dir / f"{input_file.stem}_events.json"
with open(expected_events_sidecar_eye1) as f:
events = json.load(f)
assert events["StimulusPresentation"]["ScreenResolution"] == [1919, 1079]
Expand All @@ -125,6 +155,7 @@ def test_edf_end_to_end_2eyes(eyelink_test_data_dir):
assert eyetrack["RecordedEye"] == "Left"

_check_output_exists(output_dir, input_file, eye=2)
_check_output_content(output_dir, input_file, eye=2)

expected_data_sidecar_eye2 = (
output_dir / f"{input_file.stem}_recording-eye2_physio.json"
Expand Down
Loading