Skip to content

Commit

Permalink
update demos
Browse files Browse the repository at this point in the history
  • Loading branch information
bejager committed May 2, 2024
1 parent 2347cc2 commit b97651b
Show file tree
Hide file tree
Showing 17 changed files with 370 additions and 173 deletions.
10 changes: 8 additions & 2 deletions .github/workflows/python-demo.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ jobs:
- name: Build dependencies
run: |
python -m pip install -U pip setuptools
pip install wheel && cd ../../binding/python && python3 setup.py sdist bdist_wheel && pip install dist/pvorca-0.1.4-py3-none-any.whl
pip install wheel
cd ../../binding/python
python3 setup.py sdist bdist_wheel
python3 -m pip install dist/pvorca-0.1.4-py3-none-any.whl
- name: Install dependencies
run: |
Expand Down Expand Up @@ -81,7 +84,10 @@ jobs:
# TODO: remove after release
- name: Build dependencies
run: |
pip install wheel && cd ../../binding/python && python3 setup.py sdist bdist_wheel && pip install --force-reinstall dist/pvorca-0.1.4-py3-none-any.whl
pip install wheel
cd ../../binding/python
python3 setup.py sdist bdist_wheel
python3 -m pip install --force-reinstall dist/pvorca-0.1.4-py3-none-any.whl
- name: Install dependencies
run: pip3 install -r requirements.txt
Expand Down
97 changes: 91 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,9 @@ orca = pvorca.create(access_key='${ACCESS_KEY}')
```

Replace `${ACCESS_KEY}` with yours obtained from [Picovoice Console](https://console.picovoice.ai/).

#### Streaming synthesis

To synthesize a text stream, create an Orca Stream object and add text to it one-by-one:

```python
Expand All @@ -235,6 +238,8 @@ When done with streaming text synthesis, the stream object needs to be closed:
stream.close()
```

#### Single synthesis

Use single synthesis mode if the complete text is known in advance:

```python
Expand Down Expand Up @@ -284,9 +289,9 @@ The header file [include/pv_orca.h](./include/pv_orca.h) contains relevant infor
Build an instance of the object:

```c
pv_orca_t *handle = NULL;
pv_orca_t *orca = NULL;
const char *model_path = "${MODEL_PATH}";
pv_status_t status = pv_orca_init("${ACCESS_KEY}", model_path, &handle);
pv_status_t status = pv_orca_init("${ACCESS_KEY}", model_path, &orca);
if (status != PV_STATUS_SUCCESS) {
// error handling logic
}
Expand All @@ -303,15 +308,73 @@ status = pv_orca_synthesize_params_init(&synthesize_params);
// change the default parameters of synthesize_params as desired
```

Now, the `handle` and `synthesize_params` object can be used to synthesize speech:
#### Streaming synthesis

To synthesize a text stream, create an `orca_stream` object using the `synthesize_params`:

```c
pv_orca_stream_t *orca_stream = NULL;
status = pv_orca_stream_open(orca, synthesize_params, &orca_stream);
if (status != PV_STATUS_SUCCESS) {
// error handling logic
}
```

Add text to `orca_stream` one-by-one and handle the synthesized audio:

```c
extern char *get_next_text_chunk(void);

int32_t num_samples_chunk = 0;
int16_t *pcm_chunk = NULL;
status = pv_orca_stream_synthesize(
orca_stream,
get_next_text_chunk(),
&num_samples_chunk,
&pcm_chunk);
if (status != PV_STATUS_SUCCESS) {
// error handling logic
}
if (num_samples_chunk > 0) {
// handle pcm_chunk
}
```
Once the text stream is complete, call the flush method to synthesize the remaining text:
```c
status = pv_orca_stream_flush(orca_stream, &num_samples_chunk, &pcm_chunk);
if (status != PV_STATUS_SUCCESS) {
// error handling logic
}
if (num_samples_chunk > 0) {
// handle pcm_chunk
}
```

Once the pcms are handled, make sure to release the acquired resources for each chunk with:

```c
pv_orca_pcm_delete(pcm_chunk);
```
Finally, when done make sure to close the stream:
```c
pv_orca_stream_close(orca_stream);
```

#### Single synthesis

If the text is known in advance, single synthesis mode can be used:

```c
int32_t num_samples = 0;
int16_t *synthesized_pcm = NULL;
int32_t num_alignments = 0;
pv_orca_word_alignment_t **alignments = NULL;
status = pv_orca_synthesize(
handle,
orca,
"${TEXT}",
synthesize_params,
&num_samples,
Expand All @@ -322,13 +385,35 @@ status = pv_orca_synthesize(

Replace `${TEXT}` with the text to be synthesized including potential [custom pronunciations](#custom-pronunciations).

Print the metadata of the synthesized audio:

```c
for (int32_t i = 0; i < num_alignments; i++) {
fprintf(
stdout,
"[%s]\t.start_sec = %.2f .end_sec = %.2f\n",
alignments[i].word,
alignments[i].start_sec,
alignments[i].end_sec);
for (int32_t j = 0; j < alignments[i].num_phonemes; j++) {
fprintf(
stdout,
"\t[%s]\t.start_sec = %.2f .end_sec = %.2f\n",
alignments[i].phonemes[j].phoneme,
alignments[i].phonemes[j].start_sec,
alignments[i].phonemes[j].end_sec);

}
}
```

Finally, when done make sure to release the acquired resources:

```c
pv_orca_word_alignments_delete(num_alignments, alignments);
pv_orca_delete_pcm(pcm);
pv_orca_pcm_delete(pcm);
pv_orca_synthesize_params_delete(synthesize_params);
pv_orca_delete(handle);
pv_orca_delete(orca);
```
### Web
Expand Down
4 changes: 2 additions & 2 deletions binding/python/_orca.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ class COrcaStream(Structure):

class Stream:
"""
Orca Stream object that allows to convert a stream of text to a stream of audio.
Orca Stream object that converts a stream of text to a stream of audio.
"""

def __init__(self, handle: POINTER('Orca.COrcaStream'), orca: 'Orca') -> None:
Expand All @@ -150,7 +150,7 @@ def __init__(self, handle: POINTER('Orca.COrcaStream'), orca: 'Orca') -> None:

def synthesize(self, text: str) -> Optional[Sequence[int]]:
"""
Adds a chunk of text to the OrcaStream object and generates audio if enough text has been added.
Adds a chunk of text to the Stream object and generates audio if enough text has been added.
This function is expected to be called multiple times with consecutive chunks of text from a text stream.
The incoming text is buffered as it arrives until the length is long enough to convert a chunk of the
buffered text into audio. The caller needs to use `pv_orca_stream_flush()` to generate the audio chunk
Expand Down
2 changes: 0 additions & 2 deletions demo/c/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,8 @@ Signup or Login to [Picovoice Console](https://console.picovoice.ai/) to get you

# Speech Synthesis Demos
Orca supports two modes of operation: streaming and single synthesis.

In the streaming synthesis mode, Orca processes an incoming text stream in real-time and generates audio in parallel.
This is demonstrated in the Orca streaming demo.

In the single synthesis mode, the text is synthesized in a single call to the Orca engine.

**Note**: the following commands are run from the root of the repo.
Expand Down
30 changes: 28 additions & 2 deletions demo/c/orca_demo.c
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ int picovoice_main(int argc, char **argv) {
double proc_sec = 0.;
gettimeofday(&before, NULL);

fprintf(stdout, "Synthesizing text `%s` ...\n", text);
fprintf(stdout, "\nSynthesizing text `%s`\n", text);

int32_t num_alignments = 0;
pv_orca_word_alignment_t **alignments = NULL;
Expand Down Expand Up @@ -323,7 +323,33 @@ int picovoice_main(int argc, char **argv) {
((double) (after.tv_sec - before.tv_sec) +
((double) (after.tv_usec - before.tv_usec)) * 1e-6);

fprintf(stdout, "Synthesized text in %.1f sec\n", proc_sec);
if (num_alignments > 0) {
fprintf(stdout, "\nWord alignments");
if (num_alignments > 3) {
fprintf(stdout, " (only showing first 3):\n");
} else {
fprintf(stdout, ":\n");
}
int32_t num_alignments_shown = num_alignments > 3 ? 3 : num_alignments;
for (int32_t i = 0; i < num_alignments_shown; i++) {
fprintf(
stdout,
"word=\"%s\", start_sec=%.2f, end_sec=%.2f\n",
alignments[i]->word,
alignments[i]->start_sec,
alignments[i]->end_sec);
for (int32_t j = 0; j < alignments[i]->num_phonemes; j++) {
fprintf(
stdout,
"\tphoneme=\"%s\", start_sec=%.2f, end_sec=%.2f\n",
alignments[i]->phonemes[j]->phoneme,
alignments[i]->phonemes[j]->start_sec,
alignments[i]->phonemes[j]->end_sec);
}
}
}

fprintf(stdout, "\nSynthesized text in %.2f sec\n", proc_sec);
fprintf(stdout, "Saved audio to `%s`\n", output_path);

pv_status_t delete_status = pv_orca_word_alignments_delete_func(num_alignments, alignments);
Expand Down
Loading

0 comments on commit b97651b

Please sign in to comment.