From 27300c8fa1ab3bb26eaaeeb776d5f57eb1e07a42 Mon Sep 17 00:00:00 2001 From: nadje Date: Thu, 14 Nov 2024 15:25:25 +0100 Subject: [PATCH] code review implementations --- .../automate_custom_events/__main__.py | 576 +++++++++--------- .../cloud_interaction.py | 47 +- .../automate_custom_events/control_modules.py | 37 +- .../{process_frames.py => frame_processor.py} | 19 +- .../{read_data.py => video_utils.py} | 27 +- 5 files changed, 341 insertions(+), 365 deletions(-) rename src/pupil_labs/automate_custom_events/{process_frames.py => frame_processor.py} (94%) rename src/pupil_labs/automate_custom_events/{read_data.py => video_utils.py} (91%) diff --git a/src/pupil_labs/automate_custom_events/__main__.py b/src/pupil_labs/automate_custom_events/__main__.py index 1866018..89d59da 100644 --- a/src/pupil_labs/automate_custom_events/__main__.py +++ b/src/pupil_labs/automate_custom_events/__main__.py @@ -26,282 +26,6 @@ def extract_ids(url): return workspace_id, recording_id - -# Function to toggle visibility of the general parameters frame -def toggle_general_parameters(): - if general_frame.winfo_viewable(): - general_frame.grid_remove() # Hide the general parameters frame - else: - general_frame.grid( - row=2, column=0, columnspan=2, sticky="ew" - ) # Show the general parameters frame - - -async def run_task(): - try: - url = url_entry.get() - cloud_token = cloud_token_entry.get() - prompt_description = prompt_entry.get("1.0", "end-1c") - event_code = prompt_event_entry.get("1.0", "end-1c") - batch_size = batch_entry.get() - start_time_seconds = start_entry.get() - end_time_seconds = end_entry.get() - openai_api_key = openai_key_entry.get() - download_path = Path(download_path_entry.get()) - workspace_id, rec_id = extract_ids(url) - recpath = Path(download_path / rec_id) - await run_modules( - openai_api_key, - workspace_id, - rec_id, - cloud_token, - download_path, - recpath, - prompt_description, - event_code, - batch_size, - start_time_seconds, - end_time_seconds, - ) - finally: - pass - - -def clear_module_fields(): - """Helper function to clear all general parameters and prompt fields.""" - widgets = [ - url_entry, - cloud_token_entry, - openai_key_entry, - prompt_entry, - prompt_event_entry, - batch_entry, - start_entry, - end_entry, - ] - - for widget in widgets: - if isinstance(widget, tk.Text): - widget.delete("1.0", tk.END) - else: - widget.delete(0, tk.END) - - -def on_run_click(): - def task(): - asyncio.run(run_task()) - # Re-enable the run button and stop the progress bar in the main thread - root.after(0, lambda: run_button.config(state="normal")) - root.after(0, progress_bar.stop) - - progress_bar.start() # Start progress bar - run_button.config(state="disabled") # Disable run button to prevent multiple clicks - threading.Thread(target=task).start() - - -# Create the main window -root = tk.Tk() -root.title("Annotator Assistant") -root.geometry("600x1000") # Adjusted window size - -# Center the main window -root.update_idletasks() -width = root.winfo_width() -height = root.winfo_height() -x = (root.winfo_screenwidth() // 2) - (width // 2) -y = (root.winfo_screenheight() // 2) - (height // 2) -root.geometry(f"+{x}+{y}") - -# Set up the style -sv_ttk.set_theme("dark") -style = ttk.Style() - -# Create custom styles -style.configure("Compute.TButton", background="#6D7BE0", foreground="white", padding=6) - -style.map( - "Compute.TButton", - background=[("active", "dark blue"), ("pressed", "navy"), ("disabled", "#222222")], - foreground=[("disabled", "#424242")], -) - -style.layout( - "Compute.TButton", - [ - ( - "Button.padding", - {"children": [("Button.label", {"sticky": "nswe"})], "sticky": "nswe"}, - ) - ], -) - -style.configure( - "Custom.Horizontal.TProgressbar", troughcolor="white", background="#6D7BE0" -) - -style.configure( - "Custom.TEntry", - foreground="white", - fieldbackground="#000000", - background="#000000", - insertcolor="white", -) - -heading_font = Font(font=style.lookup("TLabel", "font")) -heading_font.configure(size=heading_font.cget("size")) -style.configure("TLabel", padding=(10, 5)) -style.configure("Heading.TLabel", font=heading_font, padding=(10, 10)) -style.configure("Accent.TButton", foreground="blue") -layout_helper = TTKFormLayoutHelper(root) - -# Main frame to center content with consistent margins -main_frame = ttk.Frame(root, padding=(40, 20, 20, 20)) # Added left and right padding -main_frame.grid(column=0, row=0, sticky="nsew") -root.columnconfigure(0, weight=1) -root.rowconfigure(0, weight=1) - -# Center the main_frame contents by configuring row and column weights -for col in range(2): - main_frame.columnconfigure(col, weight=1) - -# Toggle Button for General Parameters at the top -toggle_button = ttk.Button( - main_frame, text="Select Recording", command=toggle_general_parameters -) -toggle_button.grid(row=0, column=0, columnspan=2, pady=(10, 10), sticky="ew") - -# General parameters (in a frame) -general_frame = ttk.Frame(main_frame) - -# Create labeled entries for the general parameters using the helper functions -bg = "#000000" -entry_fg = "white" - -url_entry = layout_helper.create_labeled_entry( - general_frame, - "Recording Link", - row=0, - default_value="", -) -cloud_token_entry = layout_helper.create_labeled_entry( - general_frame, - "Cloud API Token", - row=1, - show="*", - default_value="", -) -openai_key_entry = layout_helper.create_labeled_entry( - general_frame, - "OpenAI API Key", - row=2, - show="*", - default_value="", -) -download_path_entry = layout_helper.create_labeled_folder_selector( - general_frame, "Download Path", row=3, default_path=Path.cwd() -) -batch_entry = layout_helper.create_labeled_entry( - general_frame, "Frame batch", row=4, default_value="" -) -start_entry = layout_helper.create_labeled_entry( - general_frame, "Start (s)", row=5, default_value="" -) -end_entry = layout_helper.create_labeled_entry( - general_frame, "End (s)", row=6, default_value="" -) - -# Initially hide the general parameters section -general_frame.grid(row=2, column=0, columnspan=2, sticky="ew") -general_frame.grid_remove() # Hide at start - -# Layout helper reset for main_frame -layout_helper = TTKFormLayoutHelper(main_frame) - -# Prompts (always visible) -layout_helper.row_idx = 3 # Start from row 3 to ensure correct placement - -layout_helper.add_heading_2( - "Analyze this egocentric video. The red circle in the overlay indicates where the wearer is looking. Note the times when...", - heading_font, -) -# Insert background for prompt entry -prompt_entry = tk.Text( - main_frame, height=5, width=80, bg="#000000", fg="white", insertbackground="white" -) -layout_helper.add_row( - "", prompt_entry, {"pady": 10, "sticky": "ew"} -) # Added sticky='ew' to ensure text fills the width -layout_helper.add_heading("... and report them as the following events.") -prompt_event_entry = tk.Text( - main_frame, height=5, width=80, bg="#000000", fg="white", insertbackground="white" -) -layout_helper.add_row( - "", prompt_event_entry, {"pady": 10, "sticky": "ew"} -) # Added sticky='ew' to ensure text fills the width - -# Add buttons below the prompt entries -clear_button = ttk.Button( - main_frame, text="Reset Form", command=clear_module_fields, style="TButton" -) -clear_button.grid( - row=layout_helper.row_idx, column=0, columnspan=2, pady=(10, 0), sticky="ew" -) - -run_button = ttk.Button( - main_frame, text="Compute", command=on_run_click, style="Compute.TButton" -) -run_button.grid( - row=layout_helper.row_idx + 1, column=0, columnspan=2, pady=(10, 10), sticky="ew" -) - -# Progress bar below the buttons -progress_bar = ttk.Progressbar( - main_frame, mode="indeterminate", style="Custom.Horizontal.TProgressbar" -) -progress_bar.grid( - row=layout_helper.row_idx + 2, column=0, columnspan=2, pady=(10, 10), sticky="ew" -) - -# Console output label and text area -console_label = ttk.Label(main_frame, text="Console Output:", style="Heading.TLabel") -console_label.grid( - row=layout_helper.row_idx + 3, column=0, columnspan=2, pady=(10, 0), sticky="w" -) - -console_text = tk.Text( - main_frame, - height=10, - width=80, - state="disabled", - bg="#000000", - fg="white", - wrap="word", -) -console_text.grid( - row=layout_helper.row_idx + 4, column=0, columnspan=2, pady=(5, 10), sticky="nsew" -) - -# Configure row and column weights for console_text to expand -main_frame.rowconfigure(layout_helper.row_idx + 4, weight=1) -main_frame.columnconfigure(0, weight=1) -main_frame.columnconfigure(1, weight=1) - -# Set up logging -logger = logging.getLogger() -logger.setLevel(logging.DEBUG) # Set the root logger level - -# Create formatters -formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") - -# Remove any existing handlers -logger.handlers = [] - -# Create console handler for standard console output -console_handler = logging.StreamHandler() -console_handler.setLevel(logging.DEBUG) # Adjust as needed -console_handler.setFormatter(formatter) - - # Create GUI handler for the GUI console class TextHandler(logging.Handler): def __init__(self, text_widget): @@ -322,14 +46,298 @@ def append(): self.text_widget.after(0, append) +class App(): + def __init__(self): + self.setup_gui() + + # Set up logging + logger = logging.getLogger() + logger.setLevel(logging.DEBUG) # Set the root logger level + + # Create formatters + formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + + # Remove any existing handlers + logger.handlers = [] + + # Create console handler for standard console output + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) # Adjust as needed + console_handler.setFormatter(formatter) + + gui_handler = TextHandler(self.console_text) + gui_handler.setLevel(logging.INFO) # Adjust level as needed + gui_handler.setFormatter(formatter) + + # Add handlers to the logger + logger.addHandler(console_handler) + logger.addHandler(gui_handler) + + def setup_gui(self): + self.root = tk.Tk() + self.root.title("Annotator Assistant") + self.root.geometry("600x1000") # Adjusted window size + layout_helper = TTKFormLayoutHelper(self.root) + + self.main_frame = ttk.Frame(self.root, padding=(40, 20, 20, 20)) # Added left and right padding + self.main_frame.grid(column=0, row=0, sticky="nsew") + self.root.columnconfigure(0, weight=1) + self.root.rowconfigure(0, weight=1) + + # Center the main_frame contents by configuring row and column weights + for col in range(2): + self.main_frame.columnconfigure(col, weight=1) + + self.setup_styles() + + # Center the main window + self.root.update_idletasks() + self.width = self.root.winfo_width() + self.height = self.root.winfo_height() + x = (self.root.winfo_screenwidth() // 2) - (self.width // 2) + y = (self.root.winfo_screenheight() // 2) - (self.height // 2) + self.root.geometry(f"+{x}+{y}") + + self.toggle_button = ttk.Button( + self.main_frame, text="Select Recording", command=self.toggle_settings_form + ) + self.toggle_button.grid(row=0, column=0, columnspan=2, pady=(10, 10), sticky="ew") + + # # General parameters (in a frame; hidden) + self.general_frame = ttk.Frame(self.main_frame) + self.general_frame.grid(row=2, column=0, columnspan=2, sticky="ew") + self.general_frame.grid_remove() # Hide at start + + self.settings_form = self.create_settings_form(self.general_frame) + #self.settings_form.pack(fill="x", padx=(25, 0)) + + # Layout helper reset for main_frame + layout_helper = TTKFormLayoutHelper(self.main_frame) + layout_helper.row_idx = 3 # Start from row 3 to ensure correct placement + layout_helper.add_heading_2( + "Analyze this egocentric video. The red circle in the overlay indicates where the wearer is looking. Note the times when...", + self.heading_font, + ) + + self.prompt_entry = tk.Text( + self.main_frame, height=5, width=80, bg="#000000", fg="white", insertbackground="white" + ) + layout_helper.add_row( + "", self.prompt_entry, {"pady": 10, "sticky": "ew"} + ) + layout_helper.add_heading("... and report them as the following events.") + self.prompt_event_entry = tk.Text( + self.main_frame, height=5, width=80, bg="#000000", fg="white", insertbackground="white" + ) + layout_helper.add_row( + "", self.prompt_event_entry, {"pady": 10, "sticky": "ew"} + ) + + # Buttons + self.clear_button = ttk.Button( + self.main_frame, text="Reset Form", command=self.clear_module_fields, style="TButton" + ) + self.clear_button.grid( + row=layout_helper.row_idx, column=0, columnspan=2, pady=(10, 10), sticky="ew" + ) + + self.run_button = ttk.Button( + self.main_frame, text="Compute", command=self.on_run_click, style="Compute.TButton" + ) + self.run_button.grid( + row=layout_helper.row_idx + 1, column=0, columnspan=2, pady=(10, 10), sticky="ew" + ) + + # Progress bar below the buttons + self.progress_bar = ttk.Progressbar( + self.main_frame, mode="indeterminate", style="Custom.Horizontal.TProgressbar" + ) + self.progress_bar.grid( + row=layout_helper.row_idx + 2, column=0, columnspan=2, pady=(10, 10), sticky="ew" + ) + + # Console output label and text area + self.console_label = ttk.Label(self.main_frame, text="Console Output:", style="Heading.TLabel") + self.console_label.grid( + row=layout_helper.row_idx + 3, column=0, columnspan=2, pady=(10, 0), sticky="w" + ) + + self.console_text = tk.Text( + self.main_frame, + height=10, + width=80, + state="disabled", + bg="#000000", + fg="white", + wrap="word", + ) + self.console_text.grid( + row=layout_helper.row_idx + 4, column=0, columnspan=2, pady=(5, 10), sticky="nsew" + ) -gui_handler = TextHandler(console_text) -gui_handler.setLevel(logging.INFO) # Adjust level as needed -gui_handler.setFormatter(formatter) + # Configure row and column weights for console_text to expand + self.main_frame.rowconfigure(layout_helper.row_idx + 4, weight=1) + self.main_frame.columnconfigure(0, weight=1) + self.main_frame.columnconfigure(1, weight=1) + + def setup_styles(self): + # Set up the style + sv_ttk.set_theme("dark") + style = ttk.Style() + + # Create custom styles + style.configure("Compute.TButton", background="#6D7BE0", foreground="white", padding=6) + + style.map( + "Compute.TButton", + background=[("active", "dark blue"), ("pressed", "navy"), ("disabled", "#222222")], + foreground=[("disabled", "#424242")], + ) + + style.layout( + "Compute.TButton", + [ + ( + "Button.padding", + {"children": [("Button.label", {"sticky": "nswe"})], "sticky": "nswe"}, + ) + ], + ) + + style.configure( + "Custom.Horizontal.TProgressbar", troughcolor="white", background="#6D7BE0" + ) + + style.configure( + "Custom.TEntry", + foreground="white", + fieldbackground="#000000", + background="#000000", + insertcolor="white", + ) -# Add handlers to the logger -logger.addHandler(console_handler) -logger.addHandler(gui_handler) + self.heading_font = Font(font=style.lookup("TLabel", "font")) + self.heading_font.configure(size=self.heading_font.cget("size")) + style.configure("TLabel", padding=(10, 5)) + style.configure("Heading.TLabel", font=self.heading_font, padding=(10, 10)) + style.configure("Accent.TButton", foreground="blue") + + def create_settings_form(self, container): + settings_frame = ttk.Frame(container) + settings_frame.grid_columnconfigure(1, weight=1) + + form_layout = TTKFormLayoutHelper(settings_frame) + + self.url_entry = form_layout.create_labeled_entry( + container, + "Recording Link", + row=0, + default_value="", + ) + self.cloud_token_entry = form_layout.create_labeled_entry( + container, + "Cloud API Token", + row=1, + show="*", + default_value="L", + ) + self.openai_key_entry = form_layout.create_labeled_entry( + container, + "OpenAI API Key", + row=2, + show="*", + default_value="", + ) + self.download_path_entry = form_layout.create_labeled_folder_selector( + container, "Download Path", row=3, default_path=Path.cwd() + ) + self.batch_entry = form_layout.create_labeled_entry( + container, "Frame batch", row=4, default_value="" + ) + self.start_entry = form_layout.create_labeled_entry( + container, "Start (s)", row=5, default_value="" + ) + self.end_entry = form_layout.create_labeled_entry( + container, "End (s)", row=6, default_value="" + ) + + return container + + def clear_module_fields(self): + """Helper function to clear all general parameters and prompt fields.""" + widgets = [ + self.url_entry, + self.cloud_token_entry, + self.openai_key_entry, + self.prompt_entry, + self.prompt_event_entry, + self.batch_entry, + self.start_entry, + self.end_entry, + ] + + for widget in widgets: + if isinstance(widget, tk.Text): + widget.delete("1.0", tk.END) + else: + widget.delete(0, tk.END) + + # Function to toggle visibility of the general parameters frame + def toggle_settings_form(self): + if self.settings_form.winfo_ismapped(): + self.settings_form.pack_forget() + self.settings_form.grid_remove() + self.toggle_button.config(text="Select Recording") + else: + self.settings_form.grid(row=2, column=0, columnspan=2, sticky="ew") + self.toggle_button.config(text="Select Recording") + + def on_run_click(self): + def task(): + try: + asyncio.run(self.run_task()) + except Exception as e: + logging.error(e, exc_info=True) + + # Re-enable the run button and stop the progress bar in the main thread + self.root.after(0, lambda: self.run_button.config(state="normal")) + self.root.after(0, self.progress_bar.stop) + + self.progress_bar.start() # Start progress bar + self.run_button.config(state="disabled") # Disable run button to prevent multiple clicks + threading.Thread(target=task).start() + + async def run_task(self): + url = self.url_entry.get() + cloud_token = self.cloud_token_entry.get() + prompt_description = self.prompt_entry.get("1.0", "end-1c") + event_code = self.prompt_event_entry.get("1.0", "end-1c") + batch_size = self.batch_entry.get() + start_time_seconds = self.start_entry.get() + end_time_seconds = self.end_entry.get() + openai_api_key = self.openai_key_entry.get() + download_path = Path(self.download_path_entry.get()) + workspace_id, rec_id = extract_ids(url) + await run_modules( + openai_api_key, + workspace_id, + rec_id, + cloud_token, + download_path, + prompt_description, + event_code, + batch_size, + start_time_seconds, + end_time_seconds, + ) + def execute(self): + self.root.mainloop() + self.root.quit() + +def run_main(): + app = App() + app.execute() + +if __name__ == "__main__": + run_main() -# Start the GUI event loop -root.mainloop() diff --git a/src/pupil_labs/automate_custom_events/cloud_interaction.py b/src/pupil_labs/automate_custom_events/cloud_interaction.py index 5cdde53..2ad1621 100644 --- a/src/pupil_labs/automate_custom_events/cloud_interaction.py +++ b/src/pupil_labs/automate_custom_events/cloud_interaction.py @@ -1,10 +1,8 @@ import requests import json import logging -import glob import shutil from pathlib import Path -import os API_URL = "https://api.cloud.pupil-labs.com/v2" @@ -14,7 +12,7 @@ def download_url(path: str, save_path: str, API_KEY, chunk_size=128) -> None: r = requests.get(url, stream=True, headers={"api-key": API_KEY}) r.raise_for_status() save_path = Path(save_path) - with open(save_path, "wb") as fd: + with save_path.open("wb") as fd: for chunk in r.iter_content(chunk_size=chunk_size): fd.write(chunk) @@ -25,23 +23,22 @@ def download_recording( recording_id: str, workspace_id: str, download_path: str, API_KEY ) -> None: download_path = Path(download_path) # Ensure download_path is a Path object - download_path.mkdir( - parents=True, exist_ok=True - ) # Create directory if it doesn't exist + download_path.mkdir(parents=True, exist_ok=True) # Create directory if it doesn't exist - save_path = download_path / f"{recording_id}.zip" + zip_path = download_path / f"{recording_id}.zip" status = download_url( f"workspaces/{workspace_id}/recordings:raw-data-export?ids={recording_id}", - save_path, + zip_path, API_KEY, chunk_size=128, ) - shutil.unpack_archive(save_path, download_path / recording_id) - os.remove(save_path) - for file_source in glob.glob(str(download_path / f"{recording_id}/*/*")): - file_source = Path(file_source) - file_destination = file_source.parents[1] / file_source.name - shutil.move(file_source, file_destination) + rec_path = download_path / recording_id + + shutil.unpack_archive(zip_path, rec_path) + zip_path.unlink() + + for file_source in rec_path.glob("*/*"): + shutil.move(file_source, file_source.parent.parent / file_source.name) return status @@ -58,24 +55,4 @@ def send_event_to_cloud(workspace_id, recording_id, keyword, timestamp_sec, API_ if response.status_code == 200: logging.debug(f"Event sent successfully: {data}") else: - logging.debug(f"Failed to send event: {response.status_code}, {response.text}") - - -def download_raw_recording( - recording_id: str, workspace_id: str, download_path: str, API_KEY -) -> None: - os.makedirs(download_path, exist_ok=True) - download_url( - f"/workspaces/{workspace_id}/recordings/{recording_id}.zip", - download_path / f"{recording_id}.zip", - API_KEY, - chunk_size=128, - ) - shutil.unpack_archive( - download_path / f"{recording_id}.zip", download_path / f"{recording_id}" - ) - os.remove(download_path / f"{recording_id}.zip") - for file_source in glob.glob(str(download_path / f"{recording_id}/*/*")): - file_source = Path(file_source) - file_destination = file_source.parents[1] / file_source.name - shutil.move(file_source, file_destination) + logging.debug(f"Failed to send event: {response.status_code}, {response.text}") \ No newline at end of file diff --git a/src/pupil_labs/automate_custom_events/control_modules.py b/src/pupil_labs/automate_custom_events/control_modules.py index 06639b1..126e0d0 100644 --- a/src/pupil_labs/automate_custom_events/control_modules.py +++ b/src/pupil_labs/automate_custom_events/control_modules.py @@ -5,11 +5,11 @@ import logging import numpy as np from pupil_labs.automate_custom_events.cloud_interaction import download_recording -from pupil_labs.automate_custom_events.read_data import ( +from pupil_labs.automate_custom_events.video_utils import ( encode_video_as_base64, create_gaze_overlay_video, ) -from pupil_labs.automate_custom_events.process_frames import FrameProcessor +from pupil_labs.automate_custom_events.frame_processor import FrameProcessor from pupil_labs.dynamic_content_on_rim.video.read import read_video_ts @@ -19,7 +19,6 @@ async def run_modules( rec_id, cloud_api_key, download_path, - recpath, description, event_code, batch_size, @@ -35,14 +34,13 @@ async def run_modules( download_recording(rec_id, worksp_id, download_path, cloud_api_key) recpath = Path(download_path / rec_id) - files = glob.glob(str(Path(recpath, "*.mp4"))) gaze_overlay_path = os.path.join(recpath, "gaze_overlay.mp4") if os.path.exists(gaze_overlay_path): logging.debug(f"{gaze_overlay_path} exists.") else: logging.debug(f"{gaze_overlay_path} does not exist.") - raw_video_path = files[0] + raw_video_path = next(recpath.glob("*.mp4")) # Format to read timestamps oftype = {"timestamp [ns]": np.uint64} @@ -52,14 +50,11 @@ async def run_modules( # Read gaze data logging.debug("Reading gaze data...") - gaze_df = pd.read_csv(Path(recpath, "gaze.csv"), dtype=oftype) - + gaze_df = pd.read_csv(recpath / "gaze.csv", dtype=oftype) # Read the world timestamps (needed for gaze module) logging.debug("Reading world timestamps...") - world_timestamps_df = pd.read_csv( - Path(recpath, "world_timestamps.csv"), dtype=oftype - ) + world_timestamps_df = pd.read_csv(recpath / "world_timestamps.csv", dtype=oftype) # Prepare df for gaze overlay ts_world = world_timestamps_df["timestamp [ns]"] @@ -67,7 +62,7 @@ async def run_modules( video_for_gaze_module = pd.DataFrame( { "frames": np.arange(frames), - "pts": [int(pt) for pt in pts], + "pts": pts.astype(int), "timestamp [ns]": ts_world, } ) @@ -90,26 +85,23 @@ async def run_modules( create_gaze_overlay_video( merged_sc_gaze, raw_video_path, ts_world, gaze_overlay_path ) - merged_sc_gaze.to_csv( - os.path.join(recpath, "merged_sc_gaze_GM.csv"), index=False - ) + merged_sc_gaze.to_csv(recpath / "merged_sc_gaze_GM.csv", index=False) + ############################################################################# # 2. Read gaze_overlay_video and get baseframes ############################################################################# - video_df, baseframes = encode_video_as_base64(gaze_overlay_path) - output_get_baseframes = pd.DataFrame(video_df) - output_get_baseframes.to_csv( - os.path.join(recpath, "output_get_baseframes.csv"), index=False - ) + base64_frames, frame_metadata = encode_video_as_base64(gaze_overlay_path) + output_get_baseframes = pd.DataFrame(frame_metadata) + output_get_baseframes.to_csv(recpath / "output_get_baseframes.csv", index=False) ############################################################################# # 3. Process Frames with GPT-4o ############################################################################# logging.info("Start processing the frames..") frame_processor = FrameProcessor( - baseframes, - video_df, + base64_frames, + frame_metadata, openai_api_key, cloud_api_key, rec_id, @@ -124,9 +116,10 @@ async def run_modules( async_process_frames_output_events = await frame_processor.prompting( recpath, int(batch_size) ) + print(async_process_frames_output_events) final_output_path = pd.DataFrame(async_process_frames_output_events) - final_output_path.to_csv(os.path.join(recpath, "custom_events.csv"), index=False) + final_output_path.to_csv(recpath / "custom_events.csv", index=False) logging.info( "◎ Activity recognition completed and events sent! ⚡️[/]", extra={"markup": True}, diff --git a/src/pupil_labs/automate_custom_events/process_frames.py b/src/pupil_labs/automate_custom_events/frame_processor.py similarity index 94% rename from src/pupil_labs/automate_custom_events/process_frames.py rename to src/pupil_labs/automate_custom_events/frame_processor.py index b53994a..d4b392a 100644 --- a/src/pupil_labs/automate_custom_events/process_frames.py +++ b/src/pupil_labs/automate_custom_events/frame_processor.py @@ -10,7 +10,6 @@ logger = logging.getLogger(__name__) - class FrameProcessor: def __init__( self, @@ -102,13 +101,13 @@ def is_within_time_range(self, timestamp): async def query_frame(self, index, session): # Check if the frame's timestamp is within the specified time range - timestamp = self.mydf.iloc[index]["timestamp [s]"] + timestamp = self.frame_metadata.iloc[index]["timestamp [s]"] if not self.is_within_time_range(timestamp): # print(f"Timestamp {timestamp} is not within selected timerange") return None - base64_frames_content = [{"image": self.base64Frames[index], "resize": 768}] - video_gaze_df_content = [self.mydf.iloc[index].to_dict()] + base64_frames_content = [{"image": self.base64_frames[index], "resize": 768}] + video_gaze_df_content = [self.frame_metadata.iloc[index].to_dict()] PROMPT_MESSAGES = [ { @@ -128,7 +127,7 @@ async def query_frame(self, index, session): "max_tokens": 300, } headers = { - "Authorization": f"Bearer {self.OPENAI_API_KEY}", + "Authorization": f"Bearer {self.openai_api_key}", "Content-Type": "application/json", } @@ -168,8 +167,8 @@ async def query_frame(self, index, session): # Activity is starting or being detected for the first time self.activity_states[code] = True send_event_to_cloud( - self.workspaceid, - self.recid, + self.workspace_id, + self.recording_id, code, timestamp, self.cloud_token, @@ -207,7 +206,6 @@ async def binary_search(self, session, start, end, identified_activities): return [] mid = (start + end) // 2 - # print(f"Binary search range: {start}-{end}, mid: {mid}") results = [] # Process the mid frame and ensure both prompts are evaluated @@ -231,12 +229,13 @@ async def binary_search(self, session, start, end, identified_activities): async def process_batches(self, session, batch_size): identified_activities = set() all_results = [] - for i in range(0, len(self.base64Frames), batch_size): - end = min(i + batch_size, len(self.base64Frames)) + for i in range(0, len(self.base64_frames), batch_size): + end = min(i + batch_size, len(self.base64_frames)) batch_results = await self.binary_search( session, i, end, identified_activities ) all_results.extend(batch_results) + return all_results async def prompting(self, save_path, batch_size): async with aiohttp.ClientSession() as session: diff --git a/src/pupil_labs/automate_custom_events/read_data.py b/src/pupil_labs/automate_custom_events/video_utils.py similarity index 91% rename from src/pupil_labs/automate_custom_events/read_data.py rename to src/pupil_labs/automate_custom_events/video_utils.py index b82a0e0..2dce27d 100644 --- a/src/pupil_labs/automate_custom_events/read_data.py +++ b/src/pupil_labs/automate_custom_events/video_utils.py @@ -8,16 +8,16 @@ from fractions import Fraction -def isMonotonicInc(arr): +def is_sorted(arr): return np.all(np.diff(arr) >= 0) -def get_baseframes(video_path, audio=False, auto_thread_type=True): +def encode_video_as_base64(video_path, audio=False, auto_thread_type=True): """ A function to read a video, extract frames, and store them as base64 encoded strings. :param video_path: the path to the video """ - base64Frames = [] + base64_frames = [] # Read the video with av.open(video_path) as video_container, Progress() as progress: if audio: @@ -28,10 +28,10 @@ def get_baseframes(video_path, audio=False, auto_thread_type=True): if auto_thread_type: stream.thread_type = "AUTO" - nframes = stream.frames + frame_count = stream.frames logging.info("Extracting pts...") pts, dts, ts = (list() for i in range(3)) - decode_task = progress.add_task("👓 Decoding...", total=nframes) + decode_task = progress.add_task("👓 Decoding...", total=frame_count) for packet in video_container.demux(stream): for frame in packet.decode(): if frame is not None and frame.pts is not None: @@ -50,7 +50,7 @@ def get_baseframes(video_path, audio=False, auto_thread_type=True): # Convert the frame to an image and encode it in base64 img = frame.to_ndarray(format='bgr24') _, buffer = cv2.imencode(".jpg", img) - base64Frames.append(base64.b64encode(buffer).decode("utf-8")) + base64_frames.append(base64.b64encode(buffer).decode("utf-8")) progress.advance(decode_task) progress.refresh() @@ -59,7 +59,7 @@ def get_baseframes(video_path, audio=False, auto_thread_type=True): np.array(dts, dtype=np.uint64), np.array(ts, dtype=np.uint64), ) - if not isMonotonicInc(pts): + if not is_sorted(pts): logging.warning("Pts are not monotonic increasing!.") if np.array_equal(pts, dts): logging.info("Pts and dts are equal, using pts") @@ -68,19 +68,18 @@ def get_baseframes(video_path, audio=False, auto_thread_type=True): pts = pts[idc] ts = ts[idc] - if nframes != len(pts): - nframes = len(pts) + if frame_count != len(pts): + frame_count = len(pts) else: - logging.info(f"Video has {nframes} frames") + logging.info(f"Video has {frame_count} frames") timestamps_s = ts / 1e9 - video_df = pd.DataFrame({ - "frames": np.arange(nframes), - "pts": [int(pt) for pt in pts], + frame_metadata = pd.DataFrame({ + "pts": pts.astype(int), "timestamp [ns]": ts, "timestamp [s]": timestamps_s }) - return video_df, base64Frames #, fps, nframes, pts, ts + return base64_frames, frame_metadata #, fps, nframes, pts, ts def get_frame(av_container, pts, last_pts, frame, audio=False):