Merge pull request #37 from wst24365888/dev

chore: release v1.0.0
wst24365888 · Jun 6, 2022 · 642d015 · 642d015
2 parents 4b0668a + 76c319b
commit 642d015
Show file tree

Hide file tree

Showing 10 changed files with 424 additions and 115 deletions.
diff --git a/.gitignore b/.gitignore
@@ -162,3 +162,4 @@ cython_debug/
 
 voice_presentation_control/wav_files/
 *.wav
+*.ipynb
diff --git a/README.md b/README.md
@@ -73,6 +73,11 @@
 
 See [releases](https://github.com/wst24365888/voice-presentation-control/releases).
 
+> :warning: **If you encounter an error while installing** `PyAudio` (which is in our dependencies):
+>   - For Windows users, visit [here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyaudio) to pick appropriate `.whl` to install.
+>   - For OS X users, run `brew install portaudio`, then `pip install pyaudio`.
+>   - For Debian-derived Linux distributions (like Ubuntu and Mint) users, run `sudo apt-get install libasound-dev libportaudio2 libportaudiocpp0 portaudio19-dev && pip install pyaudio`.
+
 ### Try It
 
 Just open your terminal, simply type `vpc start` and boom, it works!
@@ -124,7 +129,7 @@ For more actions you can configure, head over to [pyautogui](https://github.com/
 | Command | Description                                                                                         |
 | ------- | --------------------------------------------------------------------------------------------------- |
 | `list`  | List all audio input devices. You can check the device index you want to use by using this command. |
-| `test`  | Test audio environment. Talk and determine the threshold by using this command.                     |
+| `test`  | Test audio environment. Talk and determine the volume threshold by using this command.              |
 
 ### Usage of `vpc mic test`
 
@@ -145,15 +150,17 @@ For more actions you can configure, head over to [pyautogui](https://github.com/
 
 #### Options
 
-| Option                     | Description                                                                |
-| -------------------------- | -------------------------------------------------------------------------- |
-| `-i, --input-device-index` | Set input device index. Check your devices by `vpc mic list`. [default: 1] |
-| `-t, --threshold`          | Set threshold. Test your environment by `vpc mic test`.  [default: 3000]   |
-| `-c, --chunk`              | Set record chunk.  [default: 4096]                                         |
-| `-r, --rate`               | Set input stream rate.  [default: 44100]                                   |
-| `-s, --max-record-seconds` | Set max record seconds if your custom command is long.  [default: 2]       |
-| `-l, --language [en, zh]`  | Set language to recognize.  [default: en]                                  |
-| `--help`                   | Show help and exit.                                                        |
+| Option                     | Description                                                                    |
+| -------------------------- | ------------------------------------------------------------------------------ |
+| `-i, --input-device-index` | Set input device index. Check your devices by `vpc mic list`.  [default: 1]    |
+| `-v, --vol-threshold`      | Set volume threshold. Test your environment by `vpc mic test`.  [default: 1000] |
+| `-z, --zcr-threshold`      | Set zcr threshold.  [default: 0.075]                                           |
+| `-c, --chunk`              | Set record chunk.  [default: 4096]                                             |
+| `-r, --rate`               | Set input stream rate.  [default: 44100]                                       |
+| `-s, --max-record-seconds` | Set max record seconds if your custom command is long.  [default: 2]           |
+| `-l, --language [en, zh]`  | Set language to recognize.  [default: en]                                      |
+| `--strict  `               | Use this option for strict mode.                                               |
+| `--help`                   | Show help and exit.                                                            |
 
 <p align="right">(<a href="#top">back to top</a>)</p>
 

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "voice-presentation-control"
-version = "0.4.1"
+version = "1.0.0"
 description = "voice-presentation-control is a tool that allows you to control your presentation using voice when you don't have a presentation pen or when it's inconvinient to use the keyboard."
 authors = ["Xyphuz <[email protected]>"]
 readme = "README.md"
@@ -14,12 +14,14 @@ include = [
 ]
 
 [tool.poetry.dependencies]
-python = "^3.9"
+python = ">=3.9,<3.11"
 PyAudio = "^0.2.11"
 typer = "^0.4.1"
 PyAutoGUI = "^0.9.53"
 numpy = "^1.22.3"
-vosk = "0.3.32"
+vosk = "^0.3.32"
+scipy = "^1.8.1"
+logmmse = "^1.5"
 
 [tool.poetry.dev-dependencies]
 pytest = "^7.1.2"

diff --git a/voice_presentation_control/__init__.py b/voice_presentation_control/__init__.py
@@ -1,2 +1,2 @@
 __app_name__ = "voice_presentation_control"
-__version__ = "0.4.1"
+__version__ = "1.0.0"
diff --git a/voice_presentation_control/action_matcher.py b/voice_presentation_control/action_matcher.py
@@ -1,5 +1,5 @@
 import time
-from typing import Callable, Dict
+from typing import Callable, Dict, Tuple
 
 
 class ActionMatcher:
@@ -18,13 +18,13 @@ def throttle(self, func: Callable[[], None], timeout: int) -> bool:
 
         return False
 
-    def match(self, instruction: str) -> str:
+    def match(self, instruction: str) -> Tuple[bool, str]:
         for action_name, action in self.actions.items():
             if action_name.replace(" ", "").lower() in instruction.replace(" ", "").lower():
                 executed = self.throttle(action, 1)
                 if executed:
-                    return f"HIT: {action_name}"
+                    return True, f"HIT: {action_name}"
 
-                return f"TOO FREQUENT: {action_name}"
+                return False, f"TOO FREQUENT: {action_name}"
 
-        return "NOT HIT"
+        return False, "NOT HIT"
diff --git a/voice_presentation_control/cli.py b/voice_presentation_control/cli.py
@@ -3,7 +3,7 @@
 import platform
 import subprocess
 from enum import Enum
-from typing import Callable, Dict, List, Union
+from typing import Dict, List, Optional, Union
 
 import pyautogui
 import typer
@@ -54,11 +54,17 @@ def start(
         "-i",
         help="Set input device index. Check your devices by `vpc mic list`.",
     ),
-    threshold: int = typer.Option(
-        3000,
-        "--threshold",
-        "-t",
-        help="Set threshold. Test your environment by `vpc mic test`.",
+    vol_threshold: int = typer.Option(
+        1000,
+        "--vol-threshold",
+        "-v",
+        help="Set volume threshold. Test your environment by `vpc mic test`.",
+    ),
+    zcr_threshold: float = typer.Option(
+        0.075,
+        "--zcr-threshold",
+        "-z",
+        help="Set zcr threshold.",
     ),
     chunk: int = typer.Option(
         4096,
@@ -73,7 +79,7 @@ def start(
         help="Set input stream rate.",
     ),
     max_record_seconds: int = typer.Option(
-        2,
+        3,
         "--max-record-seconds",
         "-s",
         help="Set max record seconds if your custom command is long.",
@@ -84,41 +90,71 @@ def start(
         "-l",
         help="Set language to recognize.",
     ),
+    strict: bool = typer.Option(
+        False,
+        "--strict",
+        help="Use this option for strict mode.",
+    ),
 ) -> None:
     action_matcher = ActionMatcher()
+    actions: Dict[str, Union[str, List[str]]] = {}
 
     try:
         with open(os.path.join(os.path.dirname(__file__)) + "/configs/actions.json", encoding="utf-8") as f:
-            data = json.load(f)
-
-            try:
-                actions: Dict[str, Union[str, List[str]]] = data[lang]
-                for action_name, pyautogui_instruction in actions.items():
-                    action: Callable[[Union[str, List[str]]], None]
-
-                    if isinstance(pyautogui_instruction, str):
-                        action = lambda bind_instruction=pyautogui_instruction: pyautogui.press(  # noqa: E731
-                            bind_instruction
-                        )
-                    else:
-                        action = lambda bind_instruction=pyautogui_instruction: pyautogui.hotkey(  # noqa: E731
-                            *bind_instruction
-                        )
-
-                    action_matcher.add_action(action_name=action_name, action=action)
-            except KeyError:
-                raise KeyError(f"Language '{lang}' is not set in actions.json")
+            data: dict = json.load(f)
+
+            if data.get(lang):
+                actions = data[lang]
+            else:
+                typer.echo(f"Language '{lang}' is not set in actions.json")
+                raise typer.Exit()
     except FileNotFoundError:
-        raise FileNotFoundError(f"Language '{lang}' is not supported.")
+        raise FileNotFoundError("Config file not found.")
+
+    for action_name, pyautogui_instruction in actions.items():
+        if type(pyautogui_instruction) is str:
+
+            def action(bind_instruction=pyautogui_instruction):  # type: ignore
+                return pyautogui.press(bind_instruction)
+
+        elif type(pyautogui_instruction) is list:
+
+            def action(bind_instruction=pyautogui_instruction):  # type: ignore
+                return pyautogui.hotkey(*bind_instruction)
+
+        elif type(pyautogui_instruction) is float or type(pyautogui_instruction) is int:
+
+            def action(bind_instruction=pyautogui_instruction):  # type: ignore
+                return pyautogui.scroll(bind_instruction)
+
+        else:
+
+            def action():  # type: ignore
+                return print(f"Invalid action type of '{action_name}': {type(pyautogui_instruction)}")
+
+        action_matcher.add_action(action_name=action_name, action=action)
+
+    grammar: Optional[str] = None
+
+    if not strict:
+        if lang == SupportedLanguage.en:
+            grammar = '["{}", "[unk]"]'.format('", "'.join(actions.keys()))
+        elif lang == SupportedLanguage.zh:
+            action_names: List[str] = []
+            for action_name in actions.keys():
+                for character in action_name:
+                    action_names.append(character)
+            grammar = '["{}", "[unk]"]'.format('", "'.join(action_names))
 
     controller = Controller(
         mic.Mic(input_device_index=input_device_index),
-        threshold,
+        vol_threshold,
+        zcr_threshold,
         chunk,
         rate,
         max_record_seconds,
         action_matcher,
-        Recognizer(lang=lang),
+        Recognizer(lang=lang, grammar=grammar),
     )
     controller.start()
 

diff --git a/voice_presentation_control/configs/actions.json b/voice_presentation_control/configs/actions.json
@@ -2,11 +2,17 @@
     "en": {
         "next page": "down",
         "last page": "up",
-        "presentation mode": ["alt", "f5"]
+        "close window": ["alt", "f4"],
+        "presentation mode": ["alt", "f5"],
+        "scroll up": 320,
+        "scroll down": -320
     },
     "zh": {
         "下一页": "down",
         "上一页": "up",
-        "简报模式": ["alt", "f5"]
+        "关闭": ["alt", "f4"],
+        "简报模式": ["alt", "f5"],
+        "往上": 320,
+        "往下": -320
     }
 }