Merge pull request #24 from Picovoice/v0.2.2

V0.2.1+ - Improved text normalization
Picovoice · May 24, 2024 · 2116849 · 2116849
2 parents 5413fb4 + 93a55bd
commit 2116849
Show file tree

Hide file tree

Showing 44 changed files with 38 additions and 35 deletions.
diff --git a/binding/python/setup.py b/binding/python/setup.py
@@ -49,7 +49,7 @@
 
 setuptools.setup(
     name="pvorca",
-    version="0.2.1",
+    version="0.2.2",
     author="Picovoice",
     author_email="[email protected]",
     description="Orca Streaming Text-to-Speech Engine",

diff --git a/binding/python/test_orca.py b/binding/python/test_orca.py
@@ -48,7 +48,7 @@ def _test_audio(self, pcm: Sequence[int], ground_truth: Sequence[int]) -> None:
         pcm = pcm[:len(ground_truth)]  # compensate for discrepancies due to wav header
         self.assertEqual(len(pcm), len(ground_truth))
         for i in range(len(pcm)):
-            self.assertAlmostEqual(pcm[i], ground_truth[i], delta=500)
+            self.assertAlmostEqual(pcm[i], ground_truth[i], delta=8000)
 
     def _test_equal_timestamp(self, timestamp: float, timestamp_truth: float) -> None:
         self.assertAlmostEqual(timestamp, timestamp_truth, places=2)

diff --git a/binding/web/test/orca.test.ts b/binding/web/test/orca.test.ts
@@ -28,6 +28,7 @@ const EXPECTED_VALID_CHARACTERS = [
   'Y', 'Z', '\'', '{', '}', '|', ' ',
   '-', '1', '2', '3', '4', '5', '6',
   '7', '8', '9', '0', '@', '%', '&',
+  '\n', '_', '(', ')',
 ];
 
 const EXACT_ALIGNMENT_TEST_MODEL_IDENTIFIER = 'female';

diff --git a/demo/python/requirements.txt b/demo/python/requirements.txt
@@ -1,4 +1,4 @@
 numpy>=1.24.0
-pvorca==0.2.1
+pvorca==0.2.2
 sounddevice==0.4.6
 tiktoken==0.6.0
diff --git a/demo/python/setup.py b/demo/python/setup.py
@@ -26,15 +26,15 @@
 
 setuptools.setup(
     name="pvorcademo",
-    version="0.2.1",
+    version="0.2.2",
     author="Picovoice",
     author_email="[email protected]",
     description="Orca Streaming Text-to-Speech Engine demos",
     long_description=long_description,
     long_description_content_type="text/markdown",
     url="https://github.com/Picovoice/orca",
     packages=["pvorcademo"],
-    install_requires=["numpy>=1.24.0", "pvorca==0.2.1", "sounddevice==0.4.6", "tiktoken==0.6.0"],
+    install_requires=["numpy>=1.24.0", "pvorca==0.2.2", "sounddevice==0.4.6", "tiktoken==0.6.0"],
     include_package_data=True,
     classifiers=[
         "Development Status :: 4 - Beta",

diff --git a/include/pv_orca.h b/include/pv_orca.h
@@ -29,11 +29,11 @@ extern "C" {
  *     1) Single synthesis: converts a given text to audio. Function `pv_orca_synthesize()` returns the raw audio data,
  *        function `pv_orca_synthesize_to_file()` saves the audio to a file.
  *     2) Streaming synthesis: Converts a stream of text to a stream of audio. An OrcaStream object can be opened with
- *        `pv_orca_stream_open()` and text can be added with `pv_orca_stream_synthesize()`. The audio is
- *        generated in chunks whenever enough text has been buffered. When the text stream is finalized,
- *        the caller needs to use `pv_orca_stream_flush()` to generate the audio for the remaining text that has
- *        not been synthesized. The stream can be closed with `pv_orca_stream_close()`.
- *        Single synthesis functions cannot be called while a stream is open.
+ *        `pv_orca_stream_open()` and text chunks can be added with `pv_orca_stream_synthesize()`.
+ *        The incoming text is buffered internally and only when enough context is available will an audio chunk
+ *        be generated. When the text stream has concluded, the caller needs to use `pv_orca_stream_flush()`
+ *        to generate the audio for the remaining buffer that has yet to be synthesized. The stream can be closed
+ *        with `pv_orca_stream_close()`. Single synthesis functions cannot be called while a stream is open.
  */
 typedef struct pv_orca pv_orca_t;
 
@@ -273,7 +273,7 @@ PV_API pv_status_t pv_orca_stream_open(
  * The caller is responsible for deleting the generated audio with `pv_orca_pcm_delete()`.
  *
  * @param object The OrcaStream object.
- * @param text A chunk of text from a text input stream, comprised of valid characters.
+ * @param text A chunk of text from a text input stream. Characters not supported by Orca will be ignored.
  * Valid characters can be retrieved by calling `pv_orca_valid_characters()`.
  * Custom pronunciations can be embedded in the text via the syntax `{word|pronunciation}`. They need to be
  * added in a single call to this function. The pronunciation is expressed in ARPAbet format,

diff --git a/lib/android/arm64-v8a/libpv_orca.so b/lib/android/arm64-v8a/libpv_orca.so
diff --git a/lib/android/armeabi-v7a/libpv_orca.so b/lib/android/armeabi-v7a/libpv_orca.so
diff --git a/lib/android/x86/libpv_orca.so b/lib/android/x86/libpv_orca.so
diff --git a/lib/android/x86_64/libpv_orca.so b/lib/android/x86_64/libpv_orca.so
diff --git a/lib/ios/PvOrca.xcframework/Info.plist b/lib/ios/PvOrca.xcframework/Info.plist
@@ -6,30 +6,30 @@
 	<array>
 		<dict>
 			<key>LibraryIdentifier</key>
-			<string>ios-arm64</string>
+			<string>ios-arm64_x86_64-simulator</string>
 			<key>LibraryPath</key>
 			<string>PvOrca.framework</string>
 			<key>SupportedArchitectures</key>
 			<array>
 				<string>arm64</string>
+				<string>x86_64</string>
 			</array>
 			<key>SupportedPlatform</key>
 			<string>ios</string>
+			<key>SupportedPlatformVariant</key>
+			<string>simulator</string>
 		</dict>
 		<dict>
 			<key>LibraryIdentifier</key>
-			<string>ios-arm64_x86_64-simulator</string>
+			<string>ios-arm64</string>
 			<key>LibraryPath</key>
 			<string>PvOrca.framework</string>
 			<key>SupportedArchitectures</key>
 			<array>
 				<string>arm64</string>
-				<string>x86_64</string>
 			</array>
 			<key>SupportedPlatform</key>
 			<string>ios</string>
-			<key>SupportedPlatformVariant</key>
-			<string>simulator</string>
 		</dict>
 	</array>
 	<key>CFBundlePackageType</key>

diff --git a/lib/ios/PvOrca.xcframework/ios-arm64/PvOrca.framework/Headers/picovoice.h b/lib/ios/PvOrca.xcframework/ios-arm64/PvOrca.framework/Headers/picovoice.h
@@ -72,8 +72,6 @@ PV_API pv_status_t pv_get_error_stack(
  */
 PV_API void pv_free_error_stack(char **message_stack);
 
-PV_API void pv_set_sdk(const char *sdk);
-
 #ifdef __cplusplus
 }
 

diff --git a/lib/ios/PvOrca.xcframework/ios-arm64/PvOrca.framework/Headers/pv_orca.h b/lib/ios/PvOrca.xcframework/ios-arm64/PvOrca.framework/Headers/pv_orca.h
@@ -29,11 +29,11 @@ extern "C" {
  *     1) Single synthesis: converts a given text to audio. Function `pv_orca_synthesize()` returns the raw audio data,
  *        function `pv_orca_synthesize_to_file()` saves the audio to a file.
  *     2) Streaming synthesis: Converts a stream of text to a stream of audio. An OrcaStream object can be opened with
- *        `pv_orca_stream_open()` and text can be added with `pv_orca_stream_synthesize()`. The audio is
- *        generated in chunks whenever enough text has been buffered. When the text stream is finalized,
- *        the caller needs to use `pv_orca_stream_flush()` to generate the audio for the remaining text that has
- *        not been synthesized. The stream can be closed with `pv_orca_stream_close()`.
- *        Single synthesis functions cannot be called while a stream is open.
+ *        `pv_orca_stream_open()` and text chunks can be added with `pv_orca_stream_synthesize()`.
+ *        The incoming text is buffered internally and only when enough context is available will an audio chunk
+ *        be generated. When the text stream has concluded, the caller needs to use `pv_orca_stream_flush()`
+ *        to generate the audio for the remaining buffer that has yet to be synthesized. The stream can be closed
+ *        with `pv_orca_stream_close()`. Single synthesis functions cannot be called while a stream is open.
  */
 typedef struct pv_orca pv_orca_t;
 
@@ -190,7 +190,8 @@ typedef struct {
 /**
  * Generates audio from text. The returned audio contains the speech representation of the text.
  * This function returns `PV_STATUS_INVALID_STATE` if an OrcaStream object is open.
- * The memory of the returned audio is allocated by Orca and can be deleted with `pv_orca_pcm_delete()`
+ * The memory of the returned audio and the alignment metadata is allocated by Orca and can be deleted with
+ * `pv_orca_pcm_delete()` and `pv_orca_word_alignments_delete()`, respectively.
  *
  * @param object The Orca object.
  * @param text Text to be converted to audio. The maximum length can be attained by calling
@@ -219,6 +220,8 @@ PV_API pv_status_t pv_orca_synthesize(
 /**
  * Generates audio from text and saves it to a file. The file contains the speech representation of the text.
  * This function returns `PV_STATUS_INVALID_STATE` if an OrcaStream object is open.
+ * The memory of the returned alignment metadata is allocated by Orca and can be deleted with
+ * `pv_orca_word_alignments_delete()`.
  *
  * @param object The Orca object.
  * @param text Text to be converted to audio. The maximum length can be attained by calling
@@ -264,7 +267,7 @@ PV_API pv_status_t pv_orca_stream_open(
 /**
  * Adds a chunk of text to the OrcaStream object and generates audio if enough text has been added.
  * This function is expected to be called multiple times with consecutive chunks of text from a text stream.
- * The incoming text is buffered as it arrives until the length is long enough to convert a chunk of the buffered
+ * The incoming text is buffered as it arrives until there is enough context to convert a chunk of the buffered
  * text into audio. The caller needs to use `pv_orca_stream_flush()` to generate the audio chunk for the remaining
  * text that has not yet been synthesized.
  * The caller is responsible for deleting the generated audio with `pv_orca_pcm_delete()`.

diff --git a/lib/ios/PvOrca.xcframework/ios-arm64/PvOrca.framework/PvOrca b/lib/ios/PvOrca.xcframework/ios-arm64/PvOrca.framework/PvOrca
diff --git a/lib/ios/PvOrca.xcframework/ios-arm64_x86_64-simulator/PvOrca.framework/Headers/picovoice.h b/lib/ios/PvOrca.xcframework/ios-arm64_x86_64-simulator/PvOrca.framework/Headers/picovoice.h
@@ -72,8 +72,6 @@ PV_API pv_status_t pv_get_error_stack(
  */
 PV_API void pv_free_error_stack(char **message_stack);
 
-PV_API void pv_set_sdk(const char *sdk);
-
 #ifdef __cplusplus
 }
 

diff --git a/lib/ios/PvOrca.xcframework/ios-arm64_x86_64-simulator/PvOrca.framework/Headers/pv_orca.h b/lib/ios/PvOrca.xcframework/ios-arm64_x86_64-simulator/PvOrca.framework/Headers/pv_orca.h
@@ -29,11 +29,11 @@ extern "C" {
  *     1) Single synthesis: converts a given text to audio. Function `pv_orca_synthesize()` returns the raw audio data,
  *        function `pv_orca_synthesize_to_file()` saves the audio to a file.
  *     2) Streaming synthesis: Converts a stream of text to a stream of audio. An OrcaStream object can be opened with
- *        `pv_orca_stream_open()` and text can be added with `pv_orca_stream_synthesize()`. The audio is
- *        generated in chunks whenever enough text has been buffered. When the text stream is finalized,
- *        the caller needs to use `pv_orca_stream_flush()` to generate the audio for the remaining text that has
- *        not been synthesized. The stream can be closed with `pv_orca_stream_close()`.
- *        Single synthesis functions cannot be called while a stream is open.
+ *        `pv_orca_stream_open()` and text chunks can be added with `pv_orca_stream_synthesize()`.
+ *        The incoming text is buffered internally and only when enough context is available will an audio chunk
+ *        be generated. When the text stream has concluded, the caller needs to use `pv_orca_stream_flush()`
+ *        to generate the audio for the remaining buffer that has yet to be synthesized. The stream can be closed
+ *        with `pv_orca_stream_close()`. Single synthesis functions cannot be called while a stream is open.
  */
 typedef struct pv_orca pv_orca_t;
 
@@ -190,7 +190,8 @@ typedef struct {
 /**
  * Generates audio from text. The returned audio contains the speech representation of the text.
  * This function returns `PV_STATUS_INVALID_STATE` if an OrcaStream object is open.
- * The memory of the returned audio is allocated by Orca and can be deleted with `pv_orca_pcm_delete()`
+ * The memory of the returned audio and the alignment metadata is allocated by Orca and can be deleted with
+ * `pv_orca_pcm_delete()` and `pv_orca_word_alignments_delete()`, respectively.
  *
  * @param object The Orca object.
  * @param text Text to be converted to audio. The maximum length can be attained by calling
@@ -219,6 +220,8 @@ PV_API pv_status_t pv_orca_synthesize(
 /**
  * Generates audio from text and saves it to a file. The file contains the speech representation of the text.
  * This function returns `PV_STATUS_INVALID_STATE` if an OrcaStream object is open.
+ * The memory of the returned alignment metadata is allocated by Orca and can be deleted with
+ * `pv_orca_word_alignments_delete()`.
  *
  * @param object The Orca object.
  * @param text Text to be converted to audio. The maximum length can be attained by calling
@@ -264,7 +267,7 @@ PV_API pv_status_t pv_orca_stream_open(
 /**
  * Adds a chunk of text to the OrcaStream object and generates audio if enough text has been added.
  * This function is expected to be called multiple times with consecutive chunks of text from a text stream.
- * The incoming text is buffered as it arrives until the length is long enough to convert a chunk of the buffered
+ * The incoming text is buffered as it arrives until there is enough context to convert a chunk of the buffered
  * text into audio. The caller needs to use `pv_orca_stream_flush()` to generate the audio chunk for the remaining
  * text that has not yet been synthesized.
  * The caller is responsible for deleting the generated audio with `pv_orca_pcm_delete()`.

diff --git a/lib/ios/PvOrca.xcframework/ios-arm64_x86_64-simulator/PvOrca.framework/PvOrca b/lib/ios/PvOrca.xcframework/ios-arm64_x86_64-simulator/PvOrca.framework/PvOrca
diff --git a/lib/java/jetson/cortex-a57-aarch64/libpv_orca_jni.so b/lib/java/jetson/cortex-a57-aarch64/libpv_orca_jni.so
diff --git a/lib/java/linux/x86_64/libpv_orca_jni.so b/lib/java/linux/x86_64/libpv_orca_jni.so
diff --git a/lib/java/mac/arm64/libpv_orca_jni.dylib b/lib/java/mac/arm64/libpv_orca_jni.dylib
diff --git a/lib/java/mac/x86_64/libpv_orca_jni.dylib b/lib/java/mac/x86_64/libpv_orca_jni.dylib
diff --git a/lib/java/raspberry-pi/cortex-a53-aarch64/libpv_orca_jni.so b/lib/java/raspberry-pi/cortex-a53-aarch64/libpv_orca_jni.so
diff --git a/lib/java/raspberry-pi/cortex-a53/libpv_orca_jni.so b/lib/java/raspberry-pi/cortex-a53/libpv_orca_jni.so
diff --git a/lib/java/raspberry-pi/cortex-a72-aarch64/libpv_orca_jni.so b/lib/java/raspberry-pi/cortex-a72-aarch64/libpv_orca_jni.so
diff --git a/lib/java/raspberry-pi/cortex-a72/libpv_orca_jni.so b/lib/java/raspberry-pi/cortex-a72/libpv_orca_jni.so
diff --git a/lib/java/raspberry-pi/cortex-a76-aarch64/libpv_orca_jni.so b/lib/java/raspberry-pi/cortex-a76-aarch64/libpv_orca_jni.so
diff --git a/lib/java/raspberry-pi/cortex-a76/libpv_orca_jni.so b/lib/java/raspberry-pi/cortex-a76/libpv_orca_jni.so
diff --git a/lib/java/windows/amd64/pv_orca_jni.dll b/lib/java/windows/amd64/pv_orca_jni.dll
diff --git a/lib/jetson/cortex-a57-aarch64/libpv_orca.so b/lib/jetson/cortex-a57-aarch64/libpv_orca.so
diff --git a/lib/linux/x86_64/libpv_orca.so b/lib/linux/x86_64/libpv_orca.so
diff --git a/lib/mac/arm64/libpv_orca.dylib b/lib/mac/arm64/libpv_orca.dylib
diff --git a/lib/mac/x86_64/libpv_orca.dylib b/lib/mac/x86_64/libpv_orca.dylib
diff --git a/lib/raspberry-pi/cortex-a53-aarch64/libpv_orca.so b/lib/raspberry-pi/cortex-a53-aarch64/libpv_orca.so
diff --git a/lib/raspberry-pi/cortex-a53/libpv_orca.so b/lib/raspberry-pi/cortex-a53/libpv_orca.so
diff --git a/lib/raspberry-pi/cortex-a72-aarch64/libpv_orca.so b/lib/raspberry-pi/cortex-a72-aarch64/libpv_orca.so
diff --git a/lib/raspberry-pi/cortex-a72/libpv_orca.so b/lib/raspberry-pi/cortex-a72/libpv_orca.so
diff --git a/lib/raspberry-pi/cortex-a76-aarch64/libpv_orca.so b/lib/raspberry-pi/cortex-a76-aarch64/libpv_orca.so
diff --git a/lib/raspberry-pi/cortex-a76/libpv_orca.so b/lib/raspberry-pi/cortex-a76/libpv_orca.so
diff --git a/lib/wasm/pv_orca.wasm b/lib/wasm/pv_orca.wasm
diff --git a/lib/wasm/pv_orca_simd.wasm b/lib/wasm/pv_orca_simd.wasm
diff --git a/lib/windows/amd64/libpv_orca.dll b/lib/windows/amd64/libpv_orca.dll
diff --git a/resources/.test/test_data.json b/resources/.test/test_data.json
@@ -6,7 +6,7 @@
     "text_alignment": "Test alignment.",
     "text_invalid": [
       "Symbols *$",
-      "Escape characters \n",
+      "Escape characters \r",
       "\"ی\", \"ء\"",
       "ॐÁ hindi and spanish",
       "Б russian",

diff --git a/resources/.test/wav/orca_params_female_stream.wav b/resources/.test/wav/orca_params_female_stream.wav
diff --git a/resources/.test/wav/orca_params_male_stream.wav b/resources/.test/wav/orca_params_male_stream.wav
-Original file line number
+Diff line change
@@ Expand Up / @@ -72,8 +72,6 @@ PV_API pv_status_t pv_get_error_stack( @@
      */
     PV_API void pv_free_error_stack(char **message_stack);
-    PV_API void pv_set_sdk(const char *sdk);
     #ifdef __cplusplus
     }
@@ Expand Down @@