Skip to content

Commit

Permalink
Merge pull request #37 from wst24365888/dev
Browse files Browse the repository at this point in the history
chore: release v1.0.0
  • Loading branch information
wst24365888 authored Jun 6, 2022
2 parents 4b0668a + 76c319b commit 642d015
Show file tree
Hide file tree
Showing 10 changed files with 424 additions and 115 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,4 @@ cython_debug/

voice_presentation_control/wav_files/
*.wav
*.ipynb
27 changes: 17 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@

See [releases](https://github.com/wst24365888/voice-presentation-control/releases).

> :warning: **If you encounter an error while installing** `PyAudio` (which is in our dependencies):
> - For Windows users, visit [here](https://www.lfd.uci.edu/~gohlke/pythonlibs/#pyaudio) to pick appropriate `.whl` to install.
> - For OS X users, run `brew install portaudio`, then `pip install pyaudio`.
> - For Debian-derived Linux distributions (like Ubuntu and Mint) users, run `sudo apt-get install libasound-dev libportaudio2 libportaudiocpp0 portaudio19-dev && pip install pyaudio`.
### Try It

Just open your terminal, simply type `vpc start` and boom, it works!
Expand Down Expand Up @@ -124,7 +129,7 @@ For more actions you can configure, head over to [pyautogui](https://github.com/
| Command | Description |
| ------- | --------------------------------------------------------------------------------------------------- |
| `list` | List all audio input devices. You can check the device index you want to use by using this command. |
| `test` | Test audio environment. Talk and determine the threshold by using this command. |
| `test` | Test audio environment. Talk and determine the volume threshold by using this command. |

### Usage of `vpc mic test`

Expand All @@ -145,15 +150,17 @@ For more actions you can configure, head over to [pyautogui](https://github.com/

#### Options

| Option | Description |
| -------------------------- | -------------------------------------------------------------------------- |
| `-i, --input-device-index` | Set input device index. Check your devices by `vpc mic list`. [default: 1] |
| `-t, --threshold` | Set threshold. Test your environment by `vpc mic test`. [default: 3000] |
| `-c, --chunk` | Set record chunk. [default: 4096] |
| `-r, --rate` | Set input stream rate. [default: 44100] |
| `-s, --max-record-seconds` | Set max record seconds if your custom command is long. [default: 2] |
| `-l, --language [en, zh]` | Set language to recognize. [default: en] |
| `--help` | Show help and exit. |
| Option | Description |
| -------------------------- | ------------------------------------------------------------------------------ |
| `-i, --input-device-index` | Set input device index. Check your devices by `vpc mic list`. [default: 1] |
| `-v, --vol-threshold` | Set volume threshold. Test your environment by `vpc mic test`. [default: 1000] |
| `-z, --zcr-threshold` | Set zcr threshold. [default: 0.075] |
| `-c, --chunk` | Set record chunk. [default: 4096] |
| `-r, --rate` | Set input stream rate. [default: 44100] |
| `-s, --max-record-seconds` | Set max record seconds if your custom command is long. [default: 2] |
| `-l, --language [en, zh]` | Set language to recognize. [default: en] |
| `--strict ` | Use this option for strict mode. |
| `--help` | Show help and exit. |

<p align="right">(<a href="#top">back to top</a>)</p>

Expand Down
237 changes: 202 additions & 35 deletions poetry.lock

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "voice-presentation-control"
version = "0.4.1"
version = "1.0.0"
description = "voice-presentation-control is a tool that allows you to control your presentation using voice when you don't have a presentation pen or when it's inconvinient to use the keyboard."
authors = ["Xyphuz <[email protected]>"]
readme = "README.md"
Expand All @@ -14,12 +14,14 @@ include = [
]

[tool.poetry.dependencies]
python = "^3.9"
python = ">=3.9,<3.11"
PyAudio = "^0.2.11"
typer = "^0.4.1"
PyAutoGUI = "^0.9.53"
numpy = "^1.22.3"
vosk = "0.3.32"
vosk = "^0.3.32"
scipy = "^1.8.1"
logmmse = "^1.5"

[tool.poetry.dev-dependencies]
pytest = "^7.1.2"
Expand Down
2 changes: 1 addition & 1 deletion voice_presentation_control/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__app_name__ = "voice_presentation_control"
__version__ = "0.4.1"
__version__ = "1.0.0"
10 changes: 5 additions & 5 deletions voice_presentation_control/action_matcher.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import time
from typing import Callable, Dict
from typing import Callable, Dict, Tuple


class ActionMatcher:
Expand All @@ -18,13 +18,13 @@ def throttle(self, func: Callable[[], None], timeout: int) -> bool:

return False

def match(self, instruction: str) -> str:
def match(self, instruction: str) -> Tuple[bool, str]:
for action_name, action in self.actions.items():
if action_name.replace(" ", "").lower() in instruction.replace(" ", "").lower():
executed = self.throttle(action, 1)
if executed:
return f"HIT: {action_name}"
return True, f"HIT: {action_name}"

return f"TOO FREQUENT: {action_name}"
return False, f"TOO FREQUENT: {action_name}"

return "NOT HIT"
return False, "NOT HIT"
94 changes: 65 additions & 29 deletions voice_presentation_control/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import platform
import subprocess
from enum import Enum
from typing import Callable, Dict, List, Union
from typing import Dict, List, Optional, Union

import pyautogui
import typer
Expand Down Expand Up @@ -54,11 +54,17 @@ def start(
"-i",
help="Set input device index. Check your devices by `vpc mic list`.",
),
threshold: int = typer.Option(
3000,
"--threshold",
"-t",
help="Set threshold. Test your environment by `vpc mic test`.",
vol_threshold: int = typer.Option(
1000,
"--vol-threshold",
"-v",
help="Set volume threshold. Test your environment by `vpc mic test`.",
),
zcr_threshold: float = typer.Option(
0.075,
"--zcr-threshold",
"-z",
help="Set zcr threshold.",
),
chunk: int = typer.Option(
4096,
Expand All @@ -73,7 +79,7 @@ def start(
help="Set input stream rate.",
),
max_record_seconds: int = typer.Option(
2,
3,
"--max-record-seconds",
"-s",
help="Set max record seconds if your custom command is long.",
Expand All @@ -84,41 +90,71 @@ def start(
"-l",
help="Set language to recognize.",
),
strict: bool = typer.Option(
False,
"--strict",
help="Use this option for strict mode.",
),
) -> None:
action_matcher = ActionMatcher()
actions: Dict[str, Union[str, List[str]]] = {}

try:
with open(os.path.join(os.path.dirname(__file__)) + "/configs/actions.json", encoding="utf-8") as f:
data = json.load(f)

try:
actions: Dict[str, Union[str, List[str]]] = data[lang]
for action_name, pyautogui_instruction in actions.items():
action: Callable[[Union[str, List[str]]], None]

if isinstance(pyautogui_instruction, str):
action = lambda bind_instruction=pyautogui_instruction: pyautogui.press( # noqa: E731
bind_instruction
)
else:
action = lambda bind_instruction=pyautogui_instruction: pyautogui.hotkey( # noqa: E731
*bind_instruction
)

action_matcher.add_action(action_name=action_name, action=action)
except KeyError:
raise KeyError(f"Language '{lang}' is not set in actions.json")
data: dict = json.load(f)

if data.get(lang):
actions = data[lang]
else:
typer.echo(f"Language '{lang}' is not set in actions.json")
raise typer.Exit()
except FileNotFoundError:
raise FileNotFoundError(f"Language '{lang}' is not supported.")
raise FileNotFoundError("Config file not found.")

for action_name, pyautogui_instruction in actions.items():
if type(pyautogui_instruction) is str:

def action(bind_instruction=pyautogui_instruction): # type: ignore
return pyautogui.press(bind_instruction)

elif type(pyautogui_instruction) is list:

def action(bind_instruction=pyautogui_instruction): # type: ignore
return pyautogui.hotkey(*bind_instruction)

elif type(pyautogui_instruction) is float or type(pyautogui_instruction) is int:

def action(bind_instruction=pyautogui_instruction): # type: ignore
return pyautogui.scroll(bind_instruction)

else:

def action(): # type: ignore
return print(f"Invalid action type of '{action_name}': {type(pyautogui_instruction)}")

action_matcher.add_action(action_name=action_name, action=action)

grammar: Optional[str] = None

if not strict:
if lang == SupportedLanguage.en:
grammar = '["{}", "[unk]"]'.format('", "'.join(actions.keys()))
elif lang == SupportedLanguage.zh:
action_names: List[str] = []
for action_name in actions.keys():
for character in action_name:
action_names.append(character)
grammar = '["{}", "[unk]"]'.format('", "'.join(action_names))

controller = Controller(
mic.Mic(input_device_index=input_device_index),
threshold,
vol_threshold,
zcr_threshold,
chunk,
rate,
max_record_seconds,
action_matcher,
Recognizer(lang=lang),
Recognizer(lang=lang, grammar=grammar),
)
controller.start()

Expand Down
10 changes: 8 additions & 2 deletions voice_presentation_control/configs/actions.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@
"en": {
"next page": "down",
"last page": "up",
"presentation mode": ["alt", "f5"]
"close window": ["alt", "f4"],
"presentation mode": ["alt", "f5"],
"scroll up": 320,
"scroll down": -320
},
"zh": {
"下一页": "down",
"上一页": "up",
"简报模式": ["alt", "f5"]
"关闭": ["alt", "f4"],
"简报模式": ["alt", "f5"],
"往上": 320,
"往下": -320
}
}
Loading

0 comments on commit 642d015

Please sign in to comment.