Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fusion model patch for 2.0.1rc4 #366

Merged
merged 14 commits into from
Dec 13, 2024
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# 2.0.1-rc.4

Patch on final release candidate

## Contributions

- Fusion model analysis and performance metrics support. Bugfixes in gaze model #366

# 2.0.0-rc.4

Our final release candidate before the official 2.0 release!
Expand Down
5 changes: 2 additions & 3 deletions bcipy/signal/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from bcipy.signal.model.pca_rda_kde.pca_rda_kde import PcaRdaKdeModel
from bcipy.signal.model.rda_kde.rda_kde import RdaKdeModel
from bcipy.signal.model.gaussian_mixture.gaussian_mixture import (
GMIndividual, GMCentralized, KernelGP, KernelGPSampleAverage)
GMIndividual, GMCentralized, GaussianProcess)


__all__ = [
Expand All @@ -11,7 +11,6 @@
"RdaKdeModel",
'GMIndividual',
'GMCentralized',
'KernelGP',
'KernelGPSampleAverage',
'GaussianProcess',
"ModelEvaluationReport",
]
Empty file.
357 changes: 357 additions & 0 deletions bcipy/signal/model/evaluate/fusion.py

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions bcipy/signal/model/gaussian_mixture/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from .gaussian_mixture import GMIndividual, GMCentralized, KernelGP, KernelGPSampleAverage
from .gaussian_mixture import GMIndividual, GMCentralized, GaussianProcess

__all__ = [
'GMIndividual',
'GMCentralized',
'KernelGP',
'KernelGPSampleAverage'
'GaussianProcess'
]
35 changes: 3 additions & 32 deletions bcipy/signal/model/gaussian_mixture/gaussian_mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,43 +12,14 @@
warnings.filterwarnings("ignore") # ignore DeprecationWarnings from tensorflow


class KernelGP(SignalModel):
def __init__(self):
reshaper = GazeReshaper()

def fit(self, training_data: np.ndarray, training_labels: np.ndarray):
training_data = np.asarray(training_data)

def evaluate(self, test_data: np.ndarray, test_labels: np.ndarray):
...

def predict(self, test_data: np.ndarray, inquiry, symbol_set) -> np.ndarray:
...

def predict_proba(self, test_data: np.ndarray) -> np.ndarray:
...

def save(self, path: Path):
...

def load(self, path: Path):
...


class KernelGPSampleAverage(SignalModel):
class GaussianProcess(SignalModel):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is missing several important methods. It doesn't seem like it could be used online as-is

reshaper = GazeReshaper()

def __init__(self):
self.ready_to_predict = False

def fit(self, training_data: np.ndarray):
training_data = np.array(training_data)
# Training data shape = inquiry x features x samples
# reshape training data to inquiry x (features x samples)
reshaped_data = training_data.reshape((len(training_data), -1))
cov_matrix = np.cov(reshaped_data, rowvar=False)
# cov_matrix_shape = (features x samples) x (features x samples)
reshaped_mean = np.mean(reshaped_data, axis=0)
...

def evaluate(self, test_data: np.ndarray, test_labels: np.ndarray):
...
Expand Down Expand Up @@ -81,7 +52,7 @@ def centralize(self, data: np.ndarray, symbol_pos: np.ndarray) -> np.ndarray:

return new_data

def substract_mean(self, data: np.ndarray, time_avg: np.ndarray) -> np.ndarray:
def subtract_mean(self, data: np.ndarray, time_avg: np.ndarray) -> np.ndarray:
""" Using the symbol locations in matrix, centralize all data (in Tobii units).
This data will only be used in certain model types.
Args:
Expand Down
188 changes: 75 additions & 113 deletions bcipy/signal/model/offline_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@
from bcipy.preferences import preferences
from bcipy.signal.model.base_model import SignalModel, SignalModelMetadata
from bcipy.signal.model.gaussian_mixture import (GMIndividual, GMCentralized,
KernelGP, KernelGPSampleAverage)
GaussianProcess)
from bcipy.signal.model.pca_rda_kde import PcaRdaKdeModel
from bcipy.signal.process import (ERPTransformParams, extract_eye_info,
filter_inquiries, get_default_transform)
from bcipy.signal.model.evaluate.fusion import calculate_eeg_gaze_fusion_acc

log = logging.getLogger(SESSION_LOG_FILENAME)
logging.basicConfig(level=logging.INFO, format="[%(threadName)-9s][%(asctime)s][%(name)s][%(levelname)s]: %(message)s")
Expand Down Expand Up @@ -229,7 +230,7 @@ def analyze_gaze(
save_figures=None,
show_figures=False,
plot_points=False,
model_type="Individual"):
model_type="Individual_GM"):
"""Analyze gaze data and return/save the gaze model.
Extract relevant information from gaze data object.
Extract timing information from trigger file.
Expand All @@ -248,9 +249,8 @@ def analyze_gaze(
save_figures (bool): If true, saves ERP figures after training to the data folder.
show_figures (bool): If true, shows ERP figures after training.
plot_points (bool): If true, plots the gaze points on the matrix image.
model_type (str): Type of gaze model to be used. Options are:
"Individual": Fits a separate Gaussian for each symbol. Default model
"Centralized": Uses data from all symbols to fit a single centralized Gaussian
model_type (str): Type of gaze model to be used. Options are: "Individual_GM", "Centralized_GM",
or "GaussianProcess".
"""
channels = gaze_data.channels
type_amp = gaze_data.daq_type
Expand All @@ -268,14 +268,12 @@ def analyze_gaze(

data, _fs = gaze_data.by_channel()

if model_type == "Individual":
if model_type == "Individual_GM":
model = GMIndividual()
elif model_type == "Centralized":
elif model_type == "Centralized_GM":
model = GMCentralized()
elif model_type == "GP":
model = KernelGP()
elif model_type == "GP_SampleAverage":
model = KernelGPSampleAverage()
model = GaussianProcess()

# Extract all Triggers info
trigger_targetness, trigger_timing, trigger_symbols = trigger_decoder(
Expand Down Expand Up @@ -311,8 +309,15 @@ def analyze_gaze(
symbol_set=alphabet()
)

# Extract the data for each target label and each eye separately.
# Apply preprocessing:
inquiry_length = inquiries_list[0].shape[1] # number of time samples in each inquiry
predefined_dimensions = 4 # left_x, left_y, right_x, right_y
preprocessed_array = np.zeros((len(inquiries_list), predefined_dimensions, inquiry_length))
# Extract left_x, left_y, right_x, right_y for each inquiry
for j in range(len(inquiries_list)):
left_eye, right_eye, _, _, _, _ = extract_eye_info(inquiries_list[j])
preprocessed_array[j] = np.concatenate((left_eye.T, right_eye.T,), axis=0)

preprocessed_data = {i: [] for i in symbol_set}
for i in symbol_set:
# Skip if there's no evidence for this symbol:
Expand All @@ -339,15 +344,15 @@ def analyze_gaze(
continue

# Fit the model based on model type.
if model_type == "Individual":
if model_type == "Individual_GM":
# Model 1: Fit Gaussian mixture on each symbol separately
reshaped_data = preprocessed_data[sym].reshape(
(preprocessed_data[sym].shape[0] *
preprocessed_data[sym].shape[2],
preprocessed_data[sym].shape[1]))
model.fit(reshaped_data)

if model_type == "Centralized":
if model_type == "Centralized_GM":
# Centralize the data using symbol positions and fit a single Gaussian.
# Load json file.
with open(f"{data_folder}/stimuli_positions.json", 'r') as params_file:
celikbasak marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -357,99 +362,42 @@ def analyze_gaze(
for j in range(len(preprocessed_data[sym])):
centralized_data[sym].append(model.centralize(preprocessed_data[sym][j], symbol_positions[sym]))

if model_type == "GP_SampleAverage":
if model_type == "GP":
# Instead of centralizing, take the time average:
for j in range(len(preprocessed_data[sym])):
temp = np.mean(preprocessed_data[sym][j], axis=1)
time_average[sym].append(temp)
centralized_data[sym].append(
model.substract_mean(
model.subtract_mean(
preprocessed_data[sym][j],
temp)) # Delta_t = X_t - mu
centralized_data[sym] = np.array(centralized_data[sym])
time_average[sym] = np.mean(np.array(time_average[sym]), axis=0)

if model_type == "Individual":
accuracy = 0
acc_all_symbols = {}
counter = 0

if model_type == "GP_SampleAverage":
test_dict = {i: [] for i in symbol_set}
# Visualize different inquiries from the same target letter:
colors = ['r', 'g', 'b', 'y', 'm', 'c', 'k', 'w', 'orange', 'purple']
for sym in symbol_set:
if len(centralized_data[sym]) == 0:
continue

if model_type == "GP":
# Split the data into train and test sets & fit the model:
centralized_data_training_set = []
for sym in symbol_set:
if len(centralized_data[sym]) <= 1:
if len(centralized_data[sym]) == 1:
test_dict[sym] = preprocessed_data[sym][-1]
continue
# Leave one out and add the rest to the training set:
for j in range(len(centralized_data[sym]) - 1):
centralized_data_training_set.append(centralized_data[sym][j])
# Add the last inquiry to the test set:
test_dict[sym] = preprocessed_data[sym][-1]

centralized_data_training_set = np.array(centralized_data_training_set)
reshaped_data = centralized_data_training_set.reshape((72, 720))
centralized_gaze_data = np.zeros_like(preprocessed_array)
for i, (_, sym) in enumerate(zip(preprocessed_array, target_symbols)):
centralized_gaze_data[i] = model.subtract_mean(preprocessed_array[i], time_average[sym])
reshaped_data = centralized_gaze_data.reshape(
(len(centralized_gaze_data), inquiry_length * predefined_dimensions))

cov_matrix = np.cov(reshaped_data, rowvar=False)
time_horizon = 9
# Accuracy vs time horizon

for eye_coord_0 in range(4):
for eye_coord_1 in range(4):
for time_0 in range(180):
for time_1 in range(180):
l_ind = eye_coord_0 * 180 + time_0
m_ind = eye_coord_1 * 180 + time_1

for eye_coord_0 in range(predefined_dimensions):
for eye_coord_1 in range(predefined_dimensions):
for time_0 in range(inquiry_length):
celikbasak marked this conversation as resolved.
Show resolved Hide resolved
for time_1 in range(inquiry_length):
l_ind = eye_coord_0 * inquiry_length + time_0
m_ind = eye_coord_1 * inquiry_length + time_1
if np.abs(time_1 - time_0) > time_horizon:
cov_matrix[l_ind, m_ind] = 0
# cov_matrix[m_ind, l_ind] = 0
reshaped_mean = np.mean(reshaped_data, axis=0)

eps = 0
regularized_cov_matrix = cov_matrix + np.eye(len(cov_matrix)) * eps
try:
inv_cov_matrix = np.linalg.inv(regularized_cov_matrix)
except BaseException:
print("Singular matrix, using pseudo-inverse instead")
eps = 10e-3 # add a small value to the diagonal to make the matrix invertible
inv_cov_matrix = np.linalg.inv(cov_matrix + np.eye(len(cov_matrix)) * eps)
# inv_cov_matrix = np.linalg.pinv(cov_matrix + np.eye(len(cov_matrix))*eps)
denominator = 0

# Find the likelihoods for the test case:
l_likelihoods = np.zeros((len(symbol_set), len(symbol_set)))
log_likelihoods = np.zeros((len(symbol_set), len(symbol_set)))
counter = 0
for i_sym0, sym0 in enumerate(symbol_set):
for i_sym1, sym1 in enumerate(symbol_set):
if len(centralized_data[sym1]) == 0:
continue
if len(test_dict[sym0]) == 0:
continue
# print(f"Target: {sym0}, Tested: {sym1}")
central_data = model.substract_mean(test_dict[sym0], time_average[sym1])
flattened_data = central_data.reshape((720,))
diff = flattened_data - reshaped_mean
numerator = -np.dot(diff.T, np.dot(inv_cov_matrix, diff)) / 2
log_likelihood = numerator - denominator
# print(f"{log_likelihood:.3E}")
log_likelihoods[i_sym0, i_sym1] = log_likelihood
# Find the max likelihood:
max_like = np.argmax(log_likelihoods[i_sym0, :])
# Check if it's the same as the target, and save the result:
if max_like == i_sym0:
# print("True")
counter += 1

if model_type == "Centralized":
# Save model parameters which are mean and covariance matrix

if model_type == "Centralized_GM":
# Fit the model parameters using the centralized data:
# flatten the dict to a np array:
cent_data = np.concatenate([centralized_data[sym] for sym in symbol_set], axis=0)
Expand Down Expand Up @@ -528,31 +476,45 @@ def offline_analysis(
assert len(data_file_paths) < 3, "BciPy only supports up to 2 devices for offline analysis."
assert len(data_file_paths) > 0, "No data files found for offline analysis."

models = []
log.info(f"Starting offline analysis for {data_file_paths}")
for raw_data_path in data_file_paths:
raw_data = load_raw_data(raw_data_path)
device_spec = devices_by_name.get(raw_data.daq_type)
# extract relevant information from raw data object eeg

if device_spec.content_type == "EEG" and device_spec.is_active:
erp_model = analyze_erp(
raw_data,
parameters,
device_spec,
data_folder,
estimate_balanced_acc,
save_figures,
show_figures)
models.append(erp_model)

if device_spec.content_type == "Eyetracker" and device_spec.is_active:
et_model = analyze_gaze(
raw_data, parameters, device_spec, data_folder, save_figures, show_figures, model_type="Individual")
models.append(et_model)

if len(models) > 1:
log.info("Multiple Models Trained. Fusion Analysis Not Yet Implemented.")
if len(data_file_paths) == 2:
# Note that the models trained here are not saved/used for the online system.
log.info("Multiple Devices Found. Starting fusion analysis...")
eeg_data = load_raw_data(data_file_paths[0])
device_spec_eeg = devices_by_name.get(eeg_data.daq_type)
gaze_data = load_raw_data(data_file_paths[1])
device_spec_gaze = devices_by_name.get(gaze_data.daq_type)
symbol_set = alphabet()
eeg_acc, gaze_acc, fusion_acc = calculate_eeg_gaze_fusion_acc(
eeg_data, gaze_data, device_spec_eeg, device_spec_gaze, symbol_set, parameters, data_folder)
log.info(f"EEG Accuracy: {eeg_acc}, Gaze Accuracy: {gaze_acc}, Fusion Accuracy: {fusion_acc}")

# Ask the user if they want to proceed with full dataset model training
if confirm("Would you like to proceed with model training?"):
models = []
log.info(f"Starting offline analysis for {data_file_paths}")
celikbasak marked this conversation as resolved.
Show resolved Hide resolved
for raw_data_path in data_file_paths:
raw_data = load_raw_data(raw_data_path)
device_spec = devices_by_name.get(raw_data.daq_type)
# extract relevant information from raw data object eeg

if device_spec.content_type == "EEG" and device_spec.is_active:
erp_model = analyze_erp(
raw_data,
parameters,
device_spec,
data_folder,
estimate_balanced_acc,
save_figures,
show_figures)
models.append(erp_model)

if device_spec.content_type == "Eyetracker" and device_spec.is_active:
et_model = analyze_gaze(
raw_data, parameters, device_spec, data_folder, save_figures, show_figures, model_type="GP")
models.append(et_model)
else:
log.info("User opted out of offline analysis.")
models = []

if alert_finished:
log.info("Alerting Offline Analysis Complete")
Expand Down
4 changes: 0 additions & 4 deletions bcipy/signal/model/pca_rda_kde/pca_rda_kde.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,10 +207,6 @@ def predict_proba(self, data: np.ndarray) -> np.ndarray:
if not self.ready_to_predict:
raise SignalException("must use model.fit() before model.predict_proba()")

# Model originally produces p(eeg | label). We want p(label | eeg):
#
# p(l=1 | e) = p(e | l=1) p(l=1) / p(e)
# log(p(l=1 | e)) = log(p(e | l=1)) + log(p(l=1)) - log(p(e))
return self.compute_class_probabilities(data)

def save(self, path: Path) -> None:
Expand Down
Loading
Loading