From 7019010f44f3c6a3ca07248e92f60eaae5fe5690 Mon Sep 17 00:00:00 2001
From: Benjamin <benjamin@picovoice.ai>
Date: Wed, 8 May 2024 11:23:51 -0700
Subject: [PATCH] change folder name and small improvements

---
 .github/workflows/python-demo.yml             |  4 +-
 binding/python/README.md                      | 12 ++--
 binding/python/_orca.py                       |  3 +-
 .../.gitignore                                |  0
 .../README.md                                 | 10 +--
 .../llm_voice_assistant_demo.py}              |  0
 .../requirements.txt                          |  0
 .../src/__init__.py                           |  0
 .../src/audio_device.py                       |  0
 .../src/llm.py                                |  0
 .../src/synthesizer.py                        |  0
 .../src/transcriber.py                        |  0
 .../src/user_input.py                         |  0
 .../src/util.py                               |  0
 demo/voice_assistant/generate_sentences.py    | 72 -------------------
 include/pv_orca.h                             |  5 +-
 16 files changed, 20 insertions(+), 86 deletions(-)
 rename demo/{voice_assistant => llm_voice_assistant}/.gitignore (100%)
 rename demo/{voice_assistant => llm_voice_assistant}/README.md (84%)
 rename demo/{voice_assistant/orca_voice_assistant_demo.py => llm_voice_assistant/llm_voice_assistant_demo.py} (100%)
 rename demo/{voice_assistant => llm_voice_assistant}/requirements.txt (100%)
 rename demo/{voice_assistant => llm_voice_assistant}/src/__init__.py (100%)
 rename demo/{voice_assistant => llm_voice_assistant}/src/audio_device.py (100%)
 rename demo/{voice_assistant => llm_voice_assistant}/src/llm.py (100%)
 rename demo/{voice_assistant => llm_voice_assistant}/src/synthesizer.py (100%)
 rename demo/{voice_assistant => llm_voice_assistant}/src/transcriber.py (100%)
 rename demo/{voice_assistant => llm_voice_assistant}/src/user_input.py (100%)
 rename demo/{voice_assistant => llm_voice_assistant}/src/util.py (100%)
 delete mode 100644 demo/voice_assistant/generate_sentences.py

diff --git a/.github/workflows/python-demo.yml b/.github/workflows/python-demo.yml
index 7bcc19f2..e35f1155 100644
--- a/.github/workflows/python-demo.yml
+++ b/.github/workflows/python-demo.yml
@@ -51,7 +51,7 @@ jobs:
           pip install wheel
           cd ../../binding/python
           python3 setup.py sdist bdist_wheel
-          python3 -m pip install dist/pvorca-0.2.0-py3-none-any.whl
+          python3 -m pip install dist/pvorca-0.2.0-py3-none-any.whl --force-reinstall
 
       - name: Install dependencies
         run: |
@@ -88,7 +88,7 @@ jobs:
           pip install wheel
           cd ../../binding/python
           python3 setup.py sdist bdist_wheel
-          python3 -m pip install dist/pvorca-0.2.0-py3-none-any.whl
+          python3 -m pip install dist/pvorca-0.2.0-py3-none-any.whl --force-reinstall
 
       - name: Install dependencies
         run: pip3 install -r requirements.txt
diff --git a/binding/python/README.md b/binding/python/README.md
index 2beaa9da..0ecf296b 100644
--- a/binding/python/README.md
+++ b/binding/python/README.md
@@ -53,9 +53,9 @@ To synthesize a text stream, create an `Orca.Stream` object and add text to it o
 stream = orca.stream_open()
 
 for text_chunk in text_generator():
-  pcm = stream.synthesize(text_chunk)
-  if pcm is not None:
-      # handle pcm
+    pcm = stream.synthesize(text_chunk)
+    if pcm is not None:
+        # handle pcm
 
 pcm = stream.flush()
 if pcm is not None:
@@ -135,11 +135,14 @@ and replace `${MODEL_PATH}` with the path to the model file with the desired voi
 
 ### Speech control
 
-Orca allows for keyword arguments to control the synthesized speech. They can be provided to the `stream_open` 
+Orca allows for keyword arguments to control the synthesized speech. They can be provided to the `stream_open`
 method or the single synthesis methods `synthesize` and `synthesize_to_file`:
 
 - `speech_rate`: Controls the speed of the generated speech. Valid values are within [0.7, 1.3]. A higher (lower) value
   produces speech that is faster (slower). The default is `1.0`.
+- `random_state`: Sets the random state for sampling during synthesis. This can be used to ensure that the synthesized
+  speech is deterministic across different runs. Valid values are all non-negative integers. If not provided, a random
+  seed will be chosen and the synthesis process will be non-deterministic.
 
 ### Orca properties
 
@@ -167,4 +170,3 @@ The `Orca.PhonemeAlignment` object has the following properties:
 
 [pvorcademo](https://pypi.org/project/pvorcademo/) provides command-line utilities for synthesizing audio using
 Orca.
-
diff --git a/binding/python/_orca.py b/binding/python/_orca.py
index 70d584da..8cf56604 100644
--- a/binding/python/_orca.py
+++ b/binding/python/_orca.py
@@ -444,7 +444,8 @@ def synthesize(
         The pronunciation is expressed in ARPAbet format, e.g.: "I {live|L IH V} in {Sevilla|S EH V IY Y AH}".
         :param speech_rate: Rate of speech of the synthesized audio. Higher numbers correspond to faster speech.
         Valid values are within [0.7, 1.3].
-        :param random_state: Random seed for the synthesis process.
+        :param random_state: Random seed for the synthesis process. Valid values are all non-negative integer. If not
+        provided, a random seed will be chosen.
         :return: A tuple containing the generated audio as a sequence of 16-bit linearly-encoded integers
         and a sequence of OrcaWordAlignment objects representing the word alignments.
         """
diff --git a/demo/voice_assistant/.gitignore b/demo/llm_voice_assistant/.gitignore
similarity index 100%
rename from demo/voice_assistant/.gitignore
rename to demo/llm_voice_assistant/.gitignore
diff --git a/demo/voice_assistant/README.md b/demo/llm_voice_assistant/README.md
similarity index 84%
rename from demo/voice_assistant/README.md
rename to demo/llm_voice_assistant/README.md
index da779519..68546e4a 100644
--- a/demo/voice_assistant/README.md
+++ b/demo/llm_voice_assistant/README.md
@@ -2,8 +2,8 @@
 
 Made in Vancouver, Canada by [Picovoice](https://picovoice.ai)
 
-This demo showcases how [Orca Streaming Text-to-Speech](https://picovoice.ai/platform/orca/) can be seamlessly integrated into LLM-applications to drastically reduce the audio latency
-of voice assistants.
+This demo showcases how [Orca Streaming Text-to-Speech](https://picovoice.ai/platform/orca/) can be seamlessly
+integrated into LLM-applications to drastically reduce the audio latency of voice assistants.
 
 ## Technologies
 
@@ -14,12 +14,12 @@ a Text-to-Speech engine.
 The following technologies are used:
 
 - Speech-to-Text: Picovoice's [Cheetah Streaming Speech-to-Text](https://picovoice.ai/platform/cheetah/)
-- LLM: \"ChatGPT\" using `gpt-3.5-turbo`
+- LLM: "ChatGPT" using `gpt-3.5-turbo`
   with OpenAI Chat Completion API.
 - TTS:
     - Picovoice's [Orca Streaming Text-to-Speech](https://picovoice.ai/platform/orca/)
     - OpenAI TTS
-  
+
 ## Compatibility
 
 This demo has been tested on Linux (x86_64) and macOS (x86_64) using Python 3.10.
@@ -35,7 +35,7 @@ To run all features of this demo, access keys are required for:
 ## Usage
 
 ```bash
-python orca_voice_assistant_demo.py --picovoice-access-key ${PV_ACCESS_KEY} --openai-access-key ${OPEN_AI_KEY}
+python llm_voice_assistant_demo.py --picovoice-access-key ${PV_ACCESS_KEY} --openai-access-key ${OPEN_AI_KEY}
 ```
 
 Replace `${PV_ACCESS_KEY}` with your `AccessKey` obtained from Picovoice Console,
diff --git a/demo/voice_assistant/orca_voice_assistant_demo.py b/demo/llm_voice_assistant/llm_voice_assistant_demo.py
similarity index 100%
rename from demo/voice_assistant/orca_voice_assistant_demo.py
rename to demo/llm_voice_assistant/llm_voice_assistant_demo.py
diff --git a/demo/voice_assistant/requirements.txt b/demo/llm_voice_assistant/requirements.txt
similarity index 100%
rename from demo/voice_assistant/requirements.txt
rename to demo/llm_voice_assistant/requirements.txt
diff --git a/demo/voice_assistant/src/__init__.py b/demo/llm_voice_assistant/src/__init__.py
similarity index 100%
rename from demo/voice_assistant/src/__init__.py
rename to demo/llm_voice_assistant/src/__init__.py
diff --git a/demo/voice_assistant/src/audio_device.py b/demo/llm_voice_assistant/src/audio_device.py
similarity index 100%
rename from demo/voice_assistant/src/audio_device.py
rename to demo/llm_voice_assistant/src/audio_device.py
diff --git a/demo/voice_assistant/src/llm.py b/demo/llm_voice_assistant/src/llm.py
similarity index 100%
rename from demo/voice_assistant/src/llm.py
rename to demo/llm_voice_assistant/src/llm.py
diff --git a/demo/voice_assistant/src/synthesizer.py b/demo/llm_voice_assistant/src/synthesizer.py
similarity index 100%
rename from demo/voice_assistant/src/synthesizer.py
rename to demo/llm_voice_assistant/src/synthesizer.py
diff --git a/demo/voice_assistant/src/transcriber.py b/demo/llm_voice_assistant/src/transcriber.py
similarity index 100%
rename from demo/voice_assistant/src/transcriber.py
rename to demo/llm_voice_assistant/src/transcriber.py
diff --git a/demo/voice_assistant/src/user_input.py b/demo/llm_voice_assistant/src/user_input.py
similarity index 100%
rename from demo/voice_assistant/src/user_input.py
rename to demo/llm_voice_assistant/src/user_input.py
diff --git a/demo/voice_assistant/src/util.py b/demo/llm_voice_assistant/src/util.py
similarity index 100%
rename from demo/voice_assistant/src/util.py
rename to demo/llm_voice_assistant/src/util.py
diff --git a/demo/voice_assistant/generate_sentences.py b/demo/voice_assistant/generate_sentences.py
deleted file mode 100644
index e8ff809e..00000000
--- a/demo/voice_assistant/generate_sentences.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import argparse
-
-from tqdm import tqdm
-
-from src import LLM, LLMs
-
-
-SYSTEM_PROMPT = """
-        You are a friendly voice assistant in customer service of an e-commerce platform.
-        Use natural, conversational language that are clear and easy to follow (short sentences, simple words).
-        Only use english letters and punctuation, no special characters.
-        Be verbose.
-        Keep the conversation flowing naturally.
-        Don't use lists.
-        If the customer was successful, say "Great!" and ask if they need help with anything else.
-        """
-
-
-def main(args: argparse.Namespace) -> None:
-
-    sentences = []
-
-    first_sentence = """
-    Hi, I'm trying to place an order on your webpage but I'm having trouble with the checkout process. 
-    Can you help me?"""
-    second_sentence = "The place order button isn't working."
-
-    llm = LLM.create(LLMs.OPENAI, access_key=args.openai_access_key, assistant_prompt=SYSTEM_PROMPT)
-
-    for _ in tqdm(range(50)):
-        llm_message = "".join([t for t in llm.chat(first_sentence) if t is not None])
-        sentences.append(llm_message)
-
-        #print(llm_message)
-
-        llm_message = "".join([t for t in llm.chat(second_sentence) if t is not None])
-        sentences.append(llm_message)
-
-        #print(llm_message)
-
-        # TODO: implement this method if using this script
-        llm.reset_history()
-
-    print("=============================================================================")
-
-    # print sentences like a python list in the following format
-    # [
-    #     "sentence 1",
-    #     "sentence 2",
-    # ]
-
-    print("[")
-    for sentence in sentences:
-        sentence = sentence.replace("\n", "")
-        sentence = sentence.replace("\"Place Order\"", "Place Order")
-        print(f'    "{sentence}",')
-    print("]")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Text-to-speech streaming synthesis")
-
-    parser.add_argument(
-        "--llm",
-        default=LLMs.DUMMY.value,
-        choices=[l.value for l in LLMs],
-        help="Choose LLM to use")
-    parser.add_argument(
-        "--openai-access-key",
-        help="Open AI access key. Needed when using openai models")
-
-    main(parser.parse_args())
diff --git a/include/pv_orca.h b/include/pv_orca.h
index 3c22cb17..f7f7d9d5 100644
--- a/include/pv_orca.h
+++ b/include/pv_orca.h
@@ -190,7 +190,8 @@ typedef struct {
 /**
  * Generates audio from text. The returned audio contains the speech representation of the text.
  * This function returns `PV_STATUS_INVALID_STATE` if an OrcaStream object is open.
- * The memory of the returned audio is allocated by Orca and can be deleted with `pv_orca_pcm_delete()`
+ * The memory of the returned audio and the alignment metadata is allocated by Orca and can be deleted with
+ * `pv_orca_pcm_delete()` and `pv_orca_word_alignments_delete()`, respectively.
  *
  * @param object The Orca object.
  * @param text Text to be converted to audio. The maximum length can be attained by calling
@@ -219,6 +220,8 @@ PV_API pv_status_t pv_orca_synthesize(
 /**
  * Generates audio from text and saves it to a file. The file contains the speech representation of the text.
  * This function returns `PV_STATUS_INVALID_STATE` if an OrcaStream object is open.
+ * The memory of the returned alignment metadata is allocated by Orca and can be deleted with
+ * `pv_orca_word_alignments_delete()`.
  *
  * @param object The Orca object.
  * @param text Text to be converted to audio. The maximum length can be attained by calling