Skip to content

feat: Parse SMI BeGaze raw data files #1013

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 47 additions & 5 deletions src/pymovements/gaze/_utils/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,21 +115,21 @@
return ret


def compile_patterns(patterns: list[dict[str, Any] | str]) -> list[dict[str, Any]]:
def compile_patterns(patterns: list[dict[str, Any] | str], msg_prefix: str) -> list[dict[str, Any]]:
"""Compile patterns from strings.

Parameters
----------
patterns: list[dict[str, Any] | str]
The list of patterns to compile.
msg_prefix: str
The message prefix to prepend to the regex patterns.

Returns
-------
list[dict[str, Any]]
Returns from string compiled regex patterns.
"""
msg_prefix = r'MSG\s+\d+[.]?\d*\s+'

compiled_patterns = []

for pattern in patterns:
Expand Down Expand Up @@ -207,13 +207,15 @@
Warning
If no metadata is found in the file.
"""
msg_prefix = r'MSG\s+\d+[.]?\d*\s+'

if patterns is None:
patterns = []
compiled_patterns = compile_patterns(patterns)
compiled_patterns = compile_patterns(patterns, msg_prefix)

if metadata_patterns is None:
metadata_patterns = []
compiled_metadata_patterns = compile_patterns(metadata_patterns)
compiled_metadata_patterns = compile_patterns(metadata_patterns, msg_prefix)

additional_columns = get_pattern_keys(compiled_patterns, 'column')
additional: dict[str, list[Any]] = {
Expand Down Expand Up @@ -643,3 +645,43 @@
'camera_position': 'unknown',
'short_name': mount_config,
}


def parse_begaze(
filepath: Path | str,
patterns: list[dict[str, Any] | str] | None = None,
schema: dict[str, Any] | None = None,
metadata_patterns: list[dict[str, Any] | str] | None = None,
encoding: str = 'ascii',
) -> tuple[pl.DataFrame, dict[str, Any]]:
"""Parse BeGaze raw data export file.

Parameters
----------
filepath: Path | str
file name of file to convert.
patterns: list[dict[str, Any] | str] | None
List of patterns to match for additional columns. (default: None)
schema: dict[str, Any] | None
Dictionary to optionally specify types of columns parsed by patterns. (default: None)
metadata_patterns: list[dict[str, Any] | str] | None
list of patterns to match for additional metadata. (default: None)
encoding: str
Text encoding of the file. (default: 'ascii')

Returns
-------
tuple[pl.DataFrame, dict[str, Any]]
A tuple containing the parsed sample data and the metadata in a dictionary.
"""
msg_prefix = r'\d+\tMSG\t\d+\t# Message:\s+'

if patterns is None:
patterns = []

Check warning on line 680 in src/pymovements/gaze/_utils/parsing.py

View check run for this annotation

Codecov / codecov/patch

src/pymovements/gaze/_utils/parsing.py#L680

Added line #L680 was not covered by tests
compile_patterns(patterns, msg_prefix)

if metadata_patterns is None:
metadata_patterns = []

Check warning on line 684 in src/pymovements/gaze/_utils/parsing.py

View check run for this annotation

Codecov / codecov/patch

src/pymovements/gaze/_utils/parsing.py#L684

Added line #L684 was not covered by tests
compile_patterns(metadata_patterns, msg_prefix)

# TODO
109 changes: 107 additions & 2 deletions tests/unit/gaze/_utils/_parsing_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,77 @@
END 10000022 SAMPLES EVENTS RES 38.54 31.12
"""

BEGAZE_TEXT = r"""
## [BeGaze]
## Converted from: C:\test.idf
## Date: 08.03.2023 09:25:20
## Version: BeGaze 3.7.40
## IDF Version: 9
## Sample Rate: 1000
## Separator Type: Msg
## Trial Count: 1
## Uses Plane File: False
## Number of Samples: 11
## Reversed: none
## [Run]
## Subject: P01
## Description: Run1
## [Calibration]
## Calibration Area: 1680 1050
## Calibration Point 0: Position(841;526)
## Calibration Point 1: Position(84;52)
## Calibration Point 2: Position(1599;52)
## Calibration Point 3: Position(84;1000)
## Calibration Point 4: Position(1599;1000)
## Calibration Point 5: Position(84;526)
## Calibration Point 6: Position(841;52)
## Calibration Point 7: Position(1599;526)
## Calibration Point 8: Position(841;1000)
## [Geometry]
## Stimulus Dimension [mm]: 474 297
## Head Distance [mm]: 700
## [Hardware Setup]
## System ID: IRX0470703-1007
## Operating System : 6.1
## IView X Version: 2.8.26
## [Filter Settings]
## Heuristics: False
## Heuristics Stage: 0
## Bilateral: True
## Gaze Cursor Filter: True
## Saccade Length [px]: 80
## Filter Depth [ms]: 20
## Format: LEFT, POR, QUALITY, PLANE, MSG
##
Time Type Trial L POR X [px] L POR Y [px] L Pupil Diameter [mm] Timing Pupil Confidence R Plane Info R Event Info Stimulus
10000000123 SMP 1 850.71 717.53 714.00 0 1 1 1 - test.bmp
10000000234 MSG 1 # Message: START_A
10000002123 SMP 1 850.71 717.53 714.00 0 1 1 1 - test.bmp
10000003234 MSG 1 # Message: STOP_A
10000004123 SMP 1 850.71 717.53 714.00 0 1 1 1 - test.bmp
10000004234 MSG 1 # Message: METADATA_1 123
10000005234 MSG 1 # Message: START_B
10000006123 SMP 1 850.71 717.53 714.00 0 1 1 1 - test.bmp
10000007234 MSG 1 # Message: START_TRIAL_1
10000008123 SMP 1 850.71 717.53 714.00 0 1 1 1 - test.bmp
10000009234 MSG 1 # Message: STOP_TRIAL_1
10000010234 MSG 1 # Message: START_TRIAL_2
10000011123 SMP 1 850.71 717.53 714.00 0 1 1 1 - test.bmp
10000012234 MSG 1 # Message: STOP_TRIAL_2
10000013234 MSG 1 # Message: START_TRIAL_3
10000014234 MSG 1 # Message: METADATA_2 abc
10000014235 MSG 1 # Message: METADATA_1 456
10000014345 SMP 1 850.71 717.53 714.00 0 1 1 1 - test.bmp
10000015234 MSG 1 # Message: STOP_TRIAL_3
10000016234 MSG 1 # Message: STOP_B
10000017234 MSG 1 # Message: METADATA_3
10000017345 SMP 1 850.71 717.53 714.00 0 0 -1 -1 - test.bmp
10000019123 SMP 1 850.71 717.53 714.00 0 0 -1 -1 Blink test.bmp
10000020123 SMP 1 850.71 717.53 714.00 0 0 -1 -1 Blink test.bmp
10000021123 SMP 1 850.71 717.53 714.00 0 0 1 -1 - test.bmp
""" # noqa: E501


PATTERNS = [
{
'pattern': 'START_A',
Expand Down Expand Up @@ -150,7 +221,7 @@
},
)

EXPECTED_METADATA = {
EXPECTED_METADATA_EYELINK = {
'weekday': 'Wed',
'month': 'Mar',
'day': 8,
Expand Down Expand Up @@ -191,6 +262,26 @@
'metadata_4': None,
}

# TODO: Add more metadata
EXPECTED_METADATA_BEGAZE = {
'sampling_rate': 1000.00,
'tracked_eye': 'L',
'data_loss_ratio_blinks': 0.18181818181818182,
'data_loss_ratio': 0.2727272727272727,
'total_recording_duration_ms': 11,
'datetime': datetime.datetime(2023, 3, 8, 9, 25, 20),
'blinks': [{
'duration_ms': 2,
'num_samples': 2,
'start_timestamp': 10000019.123,
'stop_timestamp': 10000021.123,
}],
'metadata_1': '123',
'metadata_2': 'abc',
'metadata_3': True,
'metadata_4': None,
}


def test_parse_eyelink(tmp_path):
filepath = tmp_path / 'sub.asc'
Expand All @@ -203,7 +294,7 @@ def test_parse_eyelink(tmp_path):
)

assert_frame_equal(df, EXPECTED_DF, check_column_order=False)
assert metadata == EXPECTED_METADATA
assert metadata == EXPECTED_METADATA_EYELINK


@pytest.mark.parametrize(
Expand Down Expand Up @@ -918,3 +1009,17 @@ def test_parse_eyelink_encoding(tmp_path, bytestring, encoding, expected_text):
)

assert parsed_metadata['text'] == expected_text


def test_parse_begaze(tmp_path):
filepath = tmp_path / 'sub.txt'
filepath.write_text(BEGAZE_TEXT)

df, metadata = pm.gaze._utils.parsing.parse_begaze(
filepath,
patterns=PATTERNS,
metadata_patterns=METADATA_PATTERNS,
)

assert_frame_equal(df, EXPECTED_DF, check_column_order=False)
assert metadata == EXPECTED_METADATA_BEGAZE
Loading