Ensure the full config is saved/loaded, and start cleaning

CorentinChauvin · Oct 9, 2024 · 184a696 · 184a696
1 parent 3e4520f
commit 184a696
Show file tree

Hide file tree

Showing 8 changed files with 106 additions and 259 deletions.
diff --git a/README.md b/README.md
@@ -8,21 +8,33 @@
     <img src="assets/logo_high.png" width=400 />
 </div>
 
-Attempting to read a portion of the screen and plot its data real time.
+Attempting to read portions of a screen or a video and exporting its data in a CSV file.
 
-> Blazingly slow, but it kinda works
+> *Blazingly slow, but it kinda works*
+
+**Contents:**
+
+1. [Features](#features)
+2. [Install](#install)
+3. [Run](#run)
+4. [Notes](#notes)
+5. [Dependencies](#dependencies)
 
 ## Features
 
 - Real time recognitions of numbers of the screen.
 - Offline number recognition in a video file.
 - Works on easily configurable areas, as many as one wants.
-- Easy to integrate new OCR methods (see below TODO)
+- Integrated Tesseract OCR and EasyOCR.
+- Easy to add new OCR methods (see `src/ocr.py`).
+- Dead simple to use!
 
 ## Install
 
+This has been mainly developed and tested on Ubuntu 22.04, with Python 3.10.
+
 ```bash
-# Install Python version (if necessary)
+# Install Python version >= 3.10 (necessary on lower Ubuntu versions)
 sudo add-apt-repository ppa:deadsnakes/ppa
 sudo apt update
 sudo apt install python3.13 python3.13-venv python3.13-tk
@@ -48,22 +60,31 @@ python gui.py
 
 ## Notes
 
+- Known OCR issues:
+    - Characters confusion, depending on the font: `0` and `8`, `1` and `7`, `5` and `9`.
+    - Missing dot (*e.g.* `42.42` turned into `4242`).
+- Tips to improve OCR reliability:
+    - Upscale the detected area, to get a better characters resolution.
+    - Use min and max bounds to filter outliers out.
+    - Don't trust the OCR output too much. Potentially implement post-filtering based on knowledge of the recorded data. For example if measuring a variable that can only evolve slowly, big jumps in the output value can be marked as outliers and discarded.
 - When processing a video, enabling the preview can induce up to 20% overhead.
-- EasyOCR requires PyTorch and Scipy, so isn't lightweight. The first time the program is started, it will download necessary model weights (stored in `~/.EasyOCR/model`). See more details on the EasyOCR GitHub ([link](https://github.com/JaidedAI/EasyOCR)).
+- EasyOCR requires PyTorch and Scipy, so isn't lightweight. The first time the program is started, it will download necessary model weights (stored in `~/.EasyOCR/model`). See more details on the EasyOCR GitHub ([link](https://github.com/JaidedAI/EasyOCR)). With this application, it seems that EasyOCR is slower than Tesseract.
 
 ## Dependencies
 
-- https://github.com/opencv/opencv-python
-- https://github.com/tomschimansky/customtkinter
-- https://github.com/tesseract-ocr/tesseract
-- https://github.com/sirfz/tesserocr
-- https://github.com/JaidedAI/EasyOCR
+This work is merely a wrapper and a graphical interface for some already existing OCR implementations. It heavily uses Tkinter and CustomTkinter for the interface.
+
+- [OpenCV](https://github.com/opencv/opencv-python) (MIT license): image processing.
+- [CustomTkinter](https://github.com/tomschimansky/customtkinter) (MIT license): beautiful interface and GUI.
+- [Tesseract](https://github.com/tesseract-ocr/tesseract) (Apache 2.0): OCR API.
+- [Tesserocr](https://github.com/sirfz/tesserocr) (MIT license): Python wrapper for Tesseract.
+- [EasyOCR](https://github.com/JaidedAI/EasyOCR) (Apache 2.0): another OCR API.
 
 ## TODO
 
 - [x] Loading and processing videos
 - [x] Saving/loading configuration
 - [x] Multi threading, for less blazing slowness
-- [ ] Make it easy to add new OCR methods, and documenting it
+- [ ] Make it easier to add new OCR methods, and documenting it
 - [ ] Logging not only in the Python terminal, but also in the logging text box
 - [ ] Real time graphing
diff --git a/gui.py b/gui.py
@@ -1,5 +1,10 @@
+#!/usr/bin/env python3
 """
-TODO
+Main entry point for the application
+
+Author:  CorentinChauvin
+Year:    2024
+License: Apache 2.0
 """
 
 from src.ocr import BaseOcrEngine, OcrMethod
@@ -44,8 +49,8 @@ def __init__(self):
         self.title("DemOCRatos - OCR for the people")
         # self.geometry(f"{1100}x{580}")
         self.iconphoto(True, ImageTk.PhotoImage(file="assets/logo_low.png"))
-        self.bind("<Escape>", lambda _: sys.exit())  # FIXME: for development only
-        self.bind("q", lambda _: sys.exit())  # FIXME: for development only
+        # self.bind("<Escape>", lambda _: sys.exit())  # FIXME: for development only
+        # self.bind("q", lambda _: sys.exit())  # FIXME: for development only
         self.protocol("WM_DELETE_WINDOW", self._on_closing_cb)
 
         self._rect_selec_window = None  # reference to the window to select the capture zone
@@ -85,12 +90,12 @@ def __init__(self):
         self.grid_rowconfigure(0, weight=0)
 
         # Set default values and statuses
-        self._stop_btn.configure(state="disabled")  # TODO: change state dynamically
+        self._stop_btn.configure(state="disabled")
 
         self._status_txt.configure(text="10:03 (12 FPS)")
         self._status_txt.configure(
             fg_color="green", text_color="white"
-        )  # TODO: change the colour depending on the status
+        )
 
         self._fps_settings_menu.set("10")
         self._ocr_settings_menu.set("Tesseract")
@@ -261,11 +266,23 @@ def __load_settings_cb():
             with open(path) as file:
                 try:
                     config = json.load(file)
-                except (UnicodeDecodeError, json.decoder.JSONDecodeError):
+                except (UnicodeDecodeError, json.decoder.JSONDecodeError) as e:
                     print("ERROR: coudn't parse JSON config")
+                    print(e)
                     return
 
-            self._captures.load_config(config)
+            try:
+                new_captures = Captures(self._output_frame)
+                new_captures.load_config(config["captures"])
+                self._fps_settings_menu.set(config["fps"])
+                self._ocr_settings_menu.set(config["ocr_method"])
+                self._max_threads_entry.set_value(config["max_threads"])
+            except KeyError as e:
+                print("ERROR: couldn't parse JSON config")
+                print(e)
+                return
+
+            self._captures = new_captures
             self._data_recorder.reset_fields(self._captures.get_names())
             self._selected_capture = self._captures.get_first()
             self._update_capture_options()
@@ -282,7 +299,11 @@ def __save_settings_cb():
                 return
 
             with open(path, "w") as file:
-                config = self._captures.get_config()
+                config = {}
+                config["captures"] = self._captures.get_config()
+                config["fps"] = self._fps_settings_menu.get()
+                config["ocr_method"] = self._ocr_settings_menu.get()
+                config["max_threads"] = self._max_threads_entry.get_value()
                 json.dump(config, file)
 
             print(config)
@@ -720,13 +741,15 @@ def _update_capture_options(self, selected: None | str = None):
 
         def __update_entry_text(entry: ctk.CTkEntry, text):
             entry.delete(0, tk.END)
-            entry.insert(0, text)
+            entry.insert(0, text if text is not None else "")
 
         self._selected_capture.toggle_edit(False)
         __update_entry_text(self._rect_xmin_entry, self._selected_capture.x_min)
         __update_entry_text(self._rect_xmax_entry, self._selected_capture.x_max)
         __update_entry_text(self._rect_ymin_entry, self._selected_capture.y_min)
         __update_entry_text(self._rect_ymax_entry, self._selected_capture.y_max)
+        __update_entry_text(self._min_entry, self._selected_capture.min_value)
+        __update_entry_text(self._max_entry, self._selected_capture.max_value)
         self._selected_capture.toggle_edit(True)
 
         self._pre_process_config_frame.update_elements(

diff --git a/main.py b/main.py