change folder name and small improvements

Picovoice · May 8, 2024 · 7019010 · 7019010
1 parent 4102949
commit 7019010
Show file tree

Hide file tree

Showing 16 changed files with 20 additions and 86 deletions.
diff --git a/.github/workflows/python-demo.yml b/.github/workflows/python-demo.yml
@@ -51,7 +51,7 @@ jobs:
           pip install wheel
           cd ../../binding/python
           python3 setup.py sdist bdist_wheel
-          python3 -m pip install dist/pvorca-0.2.0-py3-none-any.whl
+          python3 -m pip install dist/pvorca-0.2.0-py3-none-any.whl --force-reinstall
 
       - name: Install dependencies
         run: |
@@ -88,7 +88,7 @@ jobs:
           pip install wheel
           cd ../../binding/python
           python3 setup.py sdist bdist_wheel
-          python3 -m pip install dist/pvorca-0.2.0-py3-none-any.whl
+          python3 -m pip install dist/pvorca-0.2.0-py3-none-any.whl --force-reinstall
 
       - name: Install dependencies
         run: pip3 install -r requirements.txt

diff --git a/binding/python/README.md b/binding/python/README.md
@@ -53,9 +53,9 @@ To synthesize a text stream, create an `Orca.Stream` object and add text to it o
 stream = orca.stream_open()
 
 for text_chunk in text_generator():
-  pcm = stream.synthesize(text_chunk)
-  if pcm is not None:
-      # handle pcm
+    pcm = stream.synthesize(text_chunk)
+    if pcm is not None:
+        # handle pcm
 
 pcm = stream.flush()
 if pcm is not None:
@@ -135,11 +135,14 @@ and replace `${MODEL_PATH}` with the path to the model file with the desired voi
 
 ### Speech control
 
-Orca allows for keyword arguments to control the synthesized speech. They can be provided to the `stream_open` 
+Orca allows for keyword arguments to control the synthesized speech. They can be provided to the `stream_open`
 method or the single synthesis methods `synthesize` and `synthesize_to_file`:
 
 - `speech_rate`: Controls the speed of the generated speech. Valid values are within [0.7, 1.3]. A higher (lower) value
   produces speech that is faster (slower). The default is `1.0`.
+- `random_state`: Sets the random state for sampling during synthesis. This can be used to ensure that the synthesized
+  speech is deterministic across different runs. Valid values are all non-negative integers. If not provided, a random
+  seed will be chosen and the synthesis process will be non-deterministic.
 
 ### Orca properties
 
@@ -167,4 +170,3 @@ The `Orca.PhonemeAlignment` object has the following properties:
 
 [pvorcademo](https://pypi.org/project/pvorcademo/) provides command-line utilities for synthesizing audio using
 Orca.
-
diff --git a/binding/python/_orca.py b/binding/python/_orca.py
@@ -444,7 +444,8 @@ def synthesize(
         The pronunciation is expressed in ARPAbet format, e.g.: "I {live|L IH V} in {Sevilla|S EH V IY Y AH}".
         :param speech_rate: Rate of speech of the synthesized audio. Higher numbers correspond to faster speech.
         Valid values are within [0.7, 1.3].
-        :param random_state: Random seed for the synthesis process.
+        :param random_state: Random seed for the synthesis process. Valid values are all non-negative integer. If not
+        provided, a random seed will be chosen.
         :return: A tuple containing the generated audio as a sequence of 16-bit linearly-encoded integers
         and a sequence of OrcaWordAlignment objects representing the word alignments.
         """

diff --git a/demo/voice_assistant/.gitignore → demo/llm_voice_assistant/.gitignore b/demo/voice_assistant/.gitignore → demo/llm_voice_assistant/.gitignore
diff --git a/demo/voice_assistant/README.md → demo/llm_voice_assistant/README.md b/demo/voice_assistant/README.md → demo/llm_voice_assistant/README.md
@@ -2,8 +2,8 @@
 
 Made in Vancouver, Canada by [Picovoice](https://picovoice.ai)
 
-This demo showcases how [Orca Streaming Text-to-Speech](https://picovoice.ai/platform/orca/) can be seamlessly integrated into LLM-applications to drastically reduce the audio latency
-of voice assistants.
+This demo showcases how [Orca Streaming Text-to-Speech](https://picovoice.ai/platform/orca/) can be seamlessly
+integrated into LLM-applications to drastically reduce the audio latency of voice assistants.
 
 ## Technologies
 
@@ -14,12 +14,12 @@ a Text-to-Speech engine.
 The following technologies are used:
 
 - Speech-to-Text: Picovoice's [Cheetah Streaming Speech-to-Text](https://picovoice.ai/platform/cheetah/)
-- LLM: \"ChatGPT\" using `gpt-3.5-turbo`
+- LLM: "ChatGPT" using `gpt-3.5-turbo`
   with OpenAI Chat Completion API.
 - TTS:
     - Picovoice's [Orca Streaming Text-to-Speech](https://picovoice.ai/platform/orca/)
     - OpenAI TTS
-  
+
 ## Compatibility
 
 This demo has been tested on Linux (x86_64) and macOS (x86_64) using Python 3.10.
@@ -35,7 +35,7 @@ To run all features of this demo, access keys are required for:
 ## Usage
 
 ```bash
-python orca_voice_assistant_demo.py --picovoice-access-key ${PV_ACCESS_KEY} --openai-access-key ${OPEN_AI_KEY}
+python llm_voice_assistant_demo.py --picovoice-access-key ${PV_ACCESS_KEY} --openai-access-key ${OPEN_AI_KEY}
 ```
 
 Replace `${PV_ACCESS_KEY}` with your `AccessKey` obtained from Picovoice Console,

diff --git a/...ce_assistant/orca_voice_assistant_demo.py → ...ice_assistant/llm_voice_assistant_demo.py b/...ce_assistant/orca_voice_assistant_demo.py → ...ice_assistant/llm_voice_assistant_demo.py
diff --git a/demo/voice_assistant/requirements.txt → demo/llm_voice_assistant/requirements.txt b/demo/voice_assistant/requirements.txt → demo/llm_voice_assistant/requirements.txt
diff --git a/demo/voice_assistant/src/__init__.py → demo/llm_voice_assistant/src/__init__.py b/demo/voice_assistant/src/__init__.py → demo/llm_voice_assistant/src/__init__.py
diff --git a/demo/voice_assistant/src/audio_device.py → demo/llm_voice_assistant/src/audio_device.py b/demo/voice_assistant/src/audio_device.py → demo/llm_voice_assistant/src/audio_device.py
diff --git a/demo/voice_assistant/src/llm.py → demo/llm_voice_assistant/src/llm.py b/demo/voice_assistant/src/llm.py → demo/llm_voice_assistant/src/llm.py
diff --git a/demo/voice_assistant/src/synthesizer.py → demo/llm_voice_assistant/src/synthesizer.py b/demo/voice_assistant/src/synthesizer.py → demo/llm_voice_assistant/src/synthesizer.py
diff --git a/demo/voice_assistant/src/transcriber.py → demo/llm_voice_assistant/src/transcriber.py b/demo/voice_assistant/src/transcriber.py → demo/llm_voice_assistant/src/transcriber.py
diff --git a/demo/voice_assistant/src/user_input.py → demo/llm_voice_assistant/src/user_input.py b/demo/voice_assistant/src/user_input.py → demo/llm_voice_assistant/src/user_input.py
diff --git a/demo/voice_assistant/src/util.py → demo/llm_voice_assistant/src/util.py b/demo/voice_assistant/src/util.py → demo/llm_voice_assistant/src/util.py
diff --git a/demo/voice_assistant/generate_sentences.py b/demo/voice_assistant/generate_sentences.py
diff --git a/include/pv_orca.h b/include/pv_orca.h
@@ -190,7 +190,8 @@ typedef struct {
 /**
  * Generates audio from text. The returned audio contains the speech representation of the text.
  * This function returns `PV_STATUS_INVALID_STATE` if an OrcaStream object is open.
- * The memory of the returned audio is allocated by Orca and can be deleted with `pv_orca_pcm_delete()`
+ * The memory of the returned audio and the alignment metadata is allocated by Orca and can be deleted with
+ * `pv_orca_pcm_delete()` and `pv_orca_word_alignments_delete()`, respectively.
  *
  * @param object The Orca object.
  * @param text Text to be converted to audio. The maximum length can be attained by calling
@@ -219,6 +220,8 @@ PV_API pv_status_t pv_orca_synthesize(
 /**
  * Generates audio from text and saves it to a file. The file contains the speech representation of the text.
  * This function returns `PV_STATUS_INVALID_STATE` if an OrcaStream object is open.
+ * The memory of the returned alignment metadata is allocated by Orca and can be deleted with
+ * `pv_orca_word_alignments_delete()`.
  *
  * @param object The Orca object.
  * @param text Text to be converted to audio. The maximum length can be attained by calling