From 20080e6e325cb94c885b7466907bc504c7703c12 Mon Sep 17 00:00:00 2001 From: Kwangsoo Yeo Date: Tue, 7 Nov 2023 16:52:52 -0800 Subject: [PATCH 01/11] initial v2.0 android changes --- binding/android/Leopard/leopard/build.gradle | 2 +- .../java/ai/picovoice/leopard/Leopard.java | 23 +++++++++-- .../ai/picovoice/leopard/LeopardNative.java | 9 +++-- .../picovoice/leopard/LeopardTranscript.java | 22 +++++++---- .../exception/LeopardActivationException.java | 9 ++++- .../LeopardActivationLimitException.java | 9 ++++- .../LeopardActivationRefusedException.java | 9 ++++- .../LeopardActivationThrottledException.java | 9 ++++- .../leopard/exception/LeopardException.java | 38 ++++++++++++++++++- .../leopard/exception/LeopardIOException.java | 9 ++++- .../LeopardInvalidArgumentException.java | 9 ++++- .../LeopardInvalidStateException.java | 9 ++++- .../exception/LeopardKeyException.java | 9 ++++- .../exception/LeopardMemoryException.java | 9 ++++- .../exception/LeopardRuntimeException.java | 9 ++++- .../LeopardStopIterationException.java | 9 ++++- 16 files changed, 155 insertions(+), 38 deletions(-) diff --git a/binding/android/Leopard/leopard/build.gradle b/binding/android/Leopard/leopard/build.gradle index 14e8ea80..bdf0c785 100644 --- a/binding/android/Leopard/leopard/build.gradle +++ b/binding/android/Leopard/leopard/build.gradle @@ -2,7 +2,7 @@ apply plugin: 'com.android.library' ext { PUBLISH_GROUP_ID = 'ai.picovoice' - PUBLISH_VERSION = '1.2.1' + PUBLISH_VERSION = '2.0.0' PUBLISH_ARTIFACT_ID = 'leopard-android' } diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java index 002cddfa..8463bb91 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java @@ -50,16 +50,21 @@ public class Leopard { * @param accessKey AccessKey obtained from Picovoice Console * @param modelPath Absolute path to the file containing Leopard model parameters. * @param enableAutomaticPunctuation Set to `true` to enable automatic punctuation insertion. + * @param enableDiarization Set to `true` to enable speaker diarization, which allows Leopard to differentiate speakers + * as part of the transcription process. Word metadata will include a `speaker_tag` to + * identify unique speakers. * @throws LeopardException if there is an error while initializing Leopard. */ private Leopard( String accessKey, String modelPath, - boolean enableAutomaticPunctuation) throws LeopardException { + boolean enableAutomaticPunctuation, + boolean enableDiarization) throws LeopardException { handle = LeopardNative.init( accessKey, modelPath, - enableAutomaticPunctuation); + enableAutomaticPunctuation, + enableDiarization); } private static String extractResource( @@ -180,6 +185,7 @@ public static class Builder { private String accessKey = null; private String modelPath = null; private boolean enableAutomaticPunctuation = false; + private boolean enableDiarization = false; /** * Setter the AccessKey. @@ -211,6 +217,16 @@ public Builder setEnableAutomaticPunctuation(boolean enableAutomaticPunctuation) return this; } + /** + * Setter for enabling speaker diarization. + * + * @param enableDiarization Set to `true` to enable speaker diarization. + */ + public Builder setEnableDiarization(boolean enableDiarization) { + this.enableDiarization = enableDiarization; + return this; + } + /** * Creates an instance of Leopard Speech-to-Text engine. */ @@ -238,7 +254,8 @@ public Leopard build(Context context) throws LeopardException { return new Leopard( accessKey, modelPath, - enableAutomaticPunctuation); + enableAutomaticPunctuation, + enableDiarization); } } } diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardNative.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardNative.java index f10e1d9c..0d2f10fd 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardNative.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardNative.java @@ -1,5 +1,5 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. @@ -14,14 +14,17 @@ class LeopardNative { + static native String getVersion(); + static native int getSampleRate(); - static native String getVersion(); + static native void setSdk(String sdk); static native long init( String accessKey, String modelPath, - boolean enableAutomaticPunctuation) throws LeopardException; + boolean enableAutomaticPunctuation, + boolean enableDiarization) throws LeopardException; static native void delete(long object); diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java index 74eeb8d8..8212784d 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java @@ -1,7 +1,9 @@ /* Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -10,9 +12,6 @@ package ai.picovoice.leopard; -/** - * Class that contains transcription results returned from Leopard. - */ public class LeopardTranscript { private final String transcriptString; @@ -47,14 +46,12 @@ public Word[] getWordArray() { return wordArray; } - /** - * Class for storing word metadata from Leopard. - */ public static class Word { private final String word; private final float confidence; private final float startSec; private final float endSec; + private final int speakerTag; /** * Constructor. @@ -63,12 +60,14 @@ public static class Word { * @param confidence Transcription confidence. It is a number within [0, 1]. * @param startSec Start of word in seconds. * @param endSec End of word in seconds. + * @param speakerTag Speaker tag. It is set to `-1` if speaker diarization is not enabled during initialization. */ - public Word(String word, float confidence, float startSec, float endSec) { + public Word(String word, float confidence, float startSec, float endSec, int speakerTag) { this.word = word; this.confidence = confidence; this.startSec = startSec; this.endSec = endSec; + this.speakerTag = speakerTag; } /** @@ -106,5 +105,14 @@ public float getStartSec() { public float getEndSec() { return endSec; } + + /** + * Getter for the speaker tag. + * + * @return Speaker tag. + */ + public int getSpeakerTag() { + return speakerTag; + } } } diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationException.java index 83768915..6040f571 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationException.java @@ -1,7 +1,9 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -18,5 +20,8 @@ public LeopardActivationException(Throwable cause) { public LeopardActivationException(String message) { super(message); } -} + public LeopardActivationException(String message, String[] messageStack) { + super(message, messageStack); + } +} diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationLimitException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationLimitException.java index d6a13b00..cd8cdd95 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationLimitException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationLimitException.java @@ -1,7 +1,9 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -18,5 +20,8 @@ public LeopardActivationLimitException(Throwable cause) { public LeopardActivationLimitException(String message) { super(message); } -} + public LeopardActivationLimitException(String message, String[] messageStack) { + super(message, messageStack); + } +} diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationRefusedException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationRefusedException.java index 943db0f2..acfd4086 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationRefusedException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationRefusedException.java @@ -1,7 +1,9 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -18,5 +20,8 @@ public LeopardActivationRefusedException(Throwable cause) { public LeopardActivationRefusedException(String message) { super(message); } -} + public LeopardActivationRefusedException(String message, String[] messageStack) { + super(message, messageStack); + } +} diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationThrottledException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationThrottledException.java index f4245f4e..e138679d 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationThrottledException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardActivationThrottledException.java @@ -1,7 +1,9 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -18,5 +20,8 @@ public LeopardActivationThrottledException(Throwable cause) { public LeopardActivationThrottledException(String message) { super(message); } -} + public LeopardActivationThrottledException(String message, String[] messageStack) { + super(message, messageStack); + } +} diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardException.java index c5d7985c..945c9947 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardException.java @@ -1,7 +1,9 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -11,12 +13,44 @@ package ai.picovoice.leopard; public class LeopardException extends Exception { + private final String message; + private final String[] messageStack; + public LeopardException(Throwable cause) { super(cause); + this.message = cause.getMessage(); + this.messageStack = null; } public LeopardException(String message) { super(message); + this.message = message; + this.messageStack = null; } -} + public LeopardException(String message, String[] messageStack) { + super(message); + this.message = message; + this.messageStack = messageStack; + } + + public String[] getMessageStack() { + return this.messageStack; + } + + @Override + public String getMessage() { + StringBuilder sb = new StringBuilder(message); + if (messageStack != null) { + if (messageStack.length > 0) { + sb.append(":"); + for (int i = 0; i < messageStack.length; i++) { + sb.append(String.format("\n [%d] %s", i, messageStack[i])); + } + } else { + sb.append("."); + } + } + return sb.toString(); + } +} diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardIOException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardIOException.java index 27849b60..2559011e 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardIOException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardIOException.java @@ -1,7 +1,9 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -18,5 +20,8 @@ public LeopardIOException(Throwable cause) { public LeopardIOException(String message) { super(message); } -} + public LeopardIOException(String message, String[] messageStack) { + super(message, messageStack); + } +} diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardInvalidArgumentException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardInvalidArgumentException.java index 141326b1..eb934c3f 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardInvalidArgumentException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardInvalidArgumentException.java @@ -1,7 +1,9 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -18,5 +20,8 @@ public LeopardInvalidArgumentException(Throwable cause) { public LeopardInvalidArgumentException(String message) { super(message); } -} + public LeopardInvalidArgumentException(String message, String[] messageStack) { + super(message, messageStack); + } +} diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardInvalidStateException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardInvalidStateException.java index a8ae603e..90d56f9d 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardInvalidStateException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardInvalidStateException.java @@ -1,7 +1,9 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -18,5 +20,8 @@ public LeopardInvalidStateException(Throwable cause) { public LeopardInvalidStateException(String message) { super(message); } -} + public LeopardInvalidStateException(String message, String[] messageStack) { + super(message, messageStack); + } +} diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardKeyException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardKeyException.java index 61f4501f..5022a36d 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardKeyException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardKeyException.java @@ -1,7 +1,9 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -18,5 +20,8 @@ public LeopardKeyException(Throwable cause) { public LeopardKeyException(String message) { super(message); } -} + public LeopardKeyException(String message, String[] messageStack) { + super(message, messageStack); + } +} diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardMemoryException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardMemoryException.java index 04c53031..40815cc9 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardMemoryException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardMemoryException.java @@ -1,7 +1,9 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -18,5 +20,8 @@ public LeopardMemoryException(Throwable cause) { public LeopardMemoryException(String message) { super(message); } -} + public LeopardMemoryException(String message, String[] messageStack) { + super(message, messageStack); + } +} diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardRuntimeException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardRuntimeException.java index 9723df6c..d9b8a78e 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardRuntimeException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardRuntimeException.java @@ -1,7 +1,9 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -18,5 +20,8 @@ public LeopardRuntimeException(Throwable cause) { public LeopardRuntimeException(String message) { super(message); } -} + public LeopardRuntimeException(String message, String[] messageStack) { + super(message, messageStack); + } +} diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardStopIterationException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardStopIterationException.java index 6d82a621..8f9570a1 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardStopIterationException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardStopIterationException.java @@ -1,7 +1,9 @@ /* - Copyright 2022 Picovoice Inc. + Copyright 2022-2023 Picovoice Inc. + You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" file accompanying this source. + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and @@ -18,5 +20,8 @@ public LeopardStopIterationException(Throwable cause) { public LeopardStopIterationException(String message) { super(message); } -} + public LeopardStopIterationException(String message, String[] messageStack) { + super(message, messageStack); + } +} From e59ec5dd0ba2b59f24cd04f9e26013d3d3401c06 Mon Sep 17 00:00:00 2001 From: Kwangsoo Yeo Date: Thu, 9 Nov 2023 09:48:59 -0800 Subject: [PATCH 02/11] update test --- .../picovoice/leopard/testapp/BaseTest.java | 23 ++- .../leopard/testapp/LeopardTest.java | 187 ++++++++++++++---- 2 files changed, 168 insertions(+), 42 deletions(-) diff --git a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java index 08ddbce3..cb95acdb 100644 --- a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java +++ b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java @@ -13,6 +13,7 @@ package ai.picovoice.leopard.testapp; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertEquals; import android.content.Context; import android.content.res.AssetManager; @@ -26,6 +27,7 @@ import org.junit.Before; import org.junit.Rule; +import java.lang.Math; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; @@ -104,18 +106,21 @@ protected static short[] readAudioFile(String audioFile) throws Exception { return pcm; } - protected void validateMetadata(LeopardTranscript.Word[] words, String transcript, float audioLength) { - String normTranscript = transcript.toUpperCase(); + protected void validateMetadata( + LeopardTranscript.Word[] words, + LeopardTranscript.Word[] expectedWords, + boolean enableDiarization + ) { for (int i = 0; i < words.length; i++) { - assertTrue(normTranscript.contains(words[i].getWord().toUpperCase())); - assertTrue(words[i].getStartSec() > 0); - assertTrue(words[i].getStartSec() <= words[i].getEndSec()); - if (i < words.length - 1) { - assertTrue(words[i].getEndSec() <= words[i + 1].getStartSec()); + assertEquals(words[i].word, expectedWords[i].word); + assertTrue(Math.abs(words[i].confidence - expectedWords[i].confidence) <= 0.01); + assertTrue(Math.abs(words[i].startSec - expectedWords[i].startSec) <= 0.01); + assertTrue(Math.abs(words[i].endSec - expectedWords[i].endSec) <= 0.01); + if (enableDiarization) { + assertEquals(words[i].speakerTag, expectedWords[i].speakerTag); } else { - assertTrue(words[i].getEndSec() <= audioLength); + assertEquals(words[i].speakerTag, -1); } - assertTrue(words[i].getConfidence() >= 0.0f && words[i].getConfidence() <= 1.0f); } } diff --git a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java index 19342dfb..23413e1d 100644 --- a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java +++ b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java @@ -136,28 +136,32 @@ public static class LanguageTests extends BaseTest { public String expectedTranscript; @Parameterized.Parameter(value = 4) - public String[] punctuations; + public String expectedTranscriptWithPunctuation; @Parameterized.Parameter(value = 5) public float errorRate; + @Parameterized.Parameter(value = 6) + public LeopardTranscript.Word[] expectedWords; + @Parameterized.Parameters(name = "{0}") public static Collection initParameters() throws IOException { String testDataJsonString = getTestDataString(); JsonParser parser = new JsonParser(); JsonObject testDataJson = parser.parse(testDataJsonString).getAsJsonObject(); - JsonArray testParameters = testDataJson.getAsJsonObject("tests").getAsJsonArray("parameters"); + JsonArray languageTests = testDataJson.getAsJsonArray("language_tests"); List parameters = new ArrayList<>(); - for (int i = 0; i < testParameters.size(); i++) { - JsonObject testData = testParameters.get(i).getAsJsonObject(); + for (int i = 0; i < languageTests.size(); i++) { + JsonObject testData = languageTests.get(i).getAsJsonObject(); String language = testData.get("language").getAsString(); String audioFile = testData.get("audio_file").getAsString(); String transcript = testData.get("transcript").getAsString(); - JsonArray punctuations = testData.get("punctuations").getAsJsonArray(); + String transcriptWithPunctuation = testData.get("transcript_with_punctuation").getAsString(); float errorRate = testData.get("error_rate").getAsFloat(); + JsonArray words = testData.get("words").getAsJsonArray(); String modelFile; if (language.equals("en")) { @@ -168,9 +172,23 @@ public static Collection initParameters() throws IOException { String testAudioFile = String.format("audio_samples/%s", audioFile); - String[] paramPunctuations = new String[punctuations.size()]; - for (int j = 0; j < punctuations.size(); j++) { - paramPunctuations[j] = punctuations.get(j).getAsString(); + LeopardTranscript.Word[] paramWords = new LeopardTranscript.Word[words.size()]; + for (int j = 0; j < words.size(); j++) { + JsonObject wordObject = words.get(j).getAsJsonObject(); + + String word = wordObject.get("word").getAsString(); + float confidence = wordObject.get("confidence").getAsFloat(); + float startSec = wordObject.get("start_sec").getAsFloat(); + float endSec = wordObject.get("end_sec").getAsFloat(); + int speakerTag = wordObject.get("speaker_tag").getAsInt(); + + paramWords[j] = new LeopardTranscript.Word( + word, + confidence, + startSec, + endSec, + speakerTag + ); } parameters.add(new Object[]{ @@ -178,8 +196,9 @@ public static Collection initParameters() throws IOException { modelFile, testAudioFile, transcript, - paramPunctuations, - errorRate + transcriptWithPunctuation, + errorRate, + paramWords }); } @@ -200,16 +219,11 @@ public void testTranscribeAudioFile() throws Exception { LeopardTranscript result = leopard.processFile(audioFile.getAbsolutePath()); - String normalizedTranscript = expectedTranscript; - for (String punctuation : punctuations) { - normalizedTranscript = normalizedTranscript.replace(punctuation, ""); - } - - assertTrue(getWordErrorRate(result.getTranscriptString(), normalizedTranscript, useCER) < errorRate); + assertTrue(getWordErrorRate(result.getTranscriptString(), expectedTranscript, useCER) < errorRate); validateMetadata( result.getWordArray(), - result.getTranscriptString(), - (float) readAudioFile(audioFile.getAbsolutePath()).length / leopard.getSampleRate() + expectedWords, + false ); leopard.delete(); @@ -228,17 +242,12 @@ public void testTranscribeAudioFileWithPunctuation() throws Exception { boolean useCER = language.equals("ja"); LeopardTranscript result = leopard.processFile(audioFile.getAbsolutePath()); - assertTrue(getWordErrorRate(result.getTranscriptString(), expectedTranscript, useCER) < errorRate); - - String normalizedTranscript = result.getTranscriptString(); - for (String punctuation : punctuations) { - normalizedTranscript = normalizedTranscript.replace(punctuation, ""); - } + assertTrue(getWordErrorRate(result.getTranscriptString(), expectedTranscriptWithPunctuation, useCER) < errorRate); validateMetadata( result.getWordArray(), - result.getTranscriptString(), - (float) readAudioFile(audioFile.getAbsolutePath()).length / leopard.getSampleRate() + expectedWords, + false ); leopard.delete(); @@ -258,19 +267,131 @@ public void testTranscribeAudioData() throws Exception { LeopardTranscript result = leopard.process(pcm); boolean useCER = language.equals("ja"); - String normalizedTranscript = result.getTranscriptString(); - for (String punctuation : punctuations) { - normalizedTranscript = normalizedTranscript.replace(punctuation, ""); - } + assertTrue(getWordErrorRate(result.getTranscriptString(), expectedTranscript, useCER) < errorRate); + validateMetadata( + result.getWordArray(), + expectedWords, + false + ); - assertTrue(getWordErrorRate(result.getTranscriptString(), normalizedTranscript, useCER) < errorRate); + leopard.delete(); + } + + @Test + public void testTranscribeAudioDataWithDiarization() throws Exception { + String modelPath = new File(testResourcesPath, modelFile).getAbsolutePath(); + Leopard leopard = new Leopard.Builder() + .setAccessKey(accessKey) + .setModelPath(modelPath) + .setEnableDiarizations(true) + .build(appContext); + + File audioFile = new File(testResourcesPath, testAudioFile); + short[] pcm = readAudioFile(audioFile.getAbsolutePath()); + + LeopardTranscript result = leopard.process(pcm); + boolean useCER = language.equals("ja"); + + assertTrue(getWordErrorRate(result.getTranscriptString(), expectedTranscript, useCER) < errorRate); validateMetadata( result.getWordArray(), - result.getTranscriptString(), - (float) pcm.length / leopard.getSampleRate() + expectedWords, + true ); leopard.delete(); } } + + @RunWith(Parameterized.class) + public static class LanguageTests extends BaseTest { + @Parameterized.Parameter(value = 0) + public String language; + + @Parameterized.Parameter(value = 1) + public String modelFile; + + @Parameterized.Parameter(value = 2) + public String testAudioFile; + + @Parameterized.Parameter(value = 3) + public LeopardTranscript.Word[] expectedWords; + + @Parameterized.Parameters(name = "{0}") + public static Collection initParameters() throws IOException { + String testDataJsonString = getTestDataString(); + + JsonParser parser = new JsonParser(); + JsonObject testDataJson = parser.parse(testDataJsonString).getAsJsonObject(); + JsonArray languageTests = testDataJson.getAsJsonArray("diarization_tests"); + + List parameters = new ArrayList<>(); + for (int i = 0; i < languageTests.size(); i++) { + JsonObject testData = languageTests.get(i).getAsJsonObject(); + + String language = testData.get("language").getAsString(); + String audioFile = testData.get("audio_file").getAsString(); + JsonArray words = testData.get("words").getAsJsonArray(); + + String modelFile; + if (language.equals("en")) { + modelFile = "model_files/leopard_params.pv"; + } else { + modelFile = String.format("model_files/leopard_params_%s.pv", language); + } + + String testAudioFile = String.format("audio_samples/%s", audioFile); + + LeopardTranscript.Word[] paramWords = new LeopardTranscript.Word[words.size()]; + for (int j = 0; j < words.size(); j++) { + JsonObject wordObject = words.get(j).getAsJsonObject(); + + String word = wordObject.get("word").getAsString(); + float confidence = wordObject.get("confidence").getAsFloat(); + float startSec = wordObject.get("start_sec").getAsFloat(); + float endSec = wordObject.get("end_sec").getAsFloat(); + int speakerTag = wordObject.get("speaker_tag").getAsInt(); + + paramWords[j] = new LeopardTranscript.Word( + word, + confidence, + startSec, + endSec, + speakerTag + ); + } + + parameters.add(new Object[]{ + language, + modelFile, + testAudioFile, + paramWords + }); + } + + return parameters; + } + + @Test + public void testDiarizationMultipleSpeakers() throws Exception { + String modelPath = new File(testResourcesPath, modelFile).getAbsolutePath(); + Leopard leopard = new Leopard.Builder() + .setAccessKey(accessKey) + .setModelPath(modelPath) + .setEnableDiarizations(true) + .build(appContext); + + File audioFile = new File(testResourcesPath, testAudioFile); + short[] pcm = readAudioFile(audioFile.getAbsolutePath()); + + LeopardTranscript result = leopard.process(pcm); + + validateMetadata( + result.getWordArray(), + expectedWords, + true + ); + leopard.delete(); + } + } } From 34edc518e183d3eca28e1911d95d376bace6dda1 Mon Sep 17 00:00:00 2001 From: Kwangsoo Yeo Date: Wed, 15 Nov 2023 14:10:04 -0800 Subject: [PATCH 03/11] update test --- .../leopard/testapp/LeopardTest.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java index 23413e1d..278caf01 100644 --- a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java +++ b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java @@ -119,6 +119,33 @@ public void getSampleRate() throws LeopardException { leopard.delete(); } + + @Test + public void testErrorStack() { + String[] error = {}; + try { + new Leopard.Builder() + .setAccessKey("invalid") + .setModelPath(defaultModelPath) + .build(appContext); + } catch (LeopardException e) { + error = e.getMessageStack(); + } + + assertTrue(0 < error.length); + assertTrue(error.length <= 8); + + try { + new Leopard.Builder() + .setAccessKey("invalid") + .setModelPath(defaultModelPath) + .build(appContext); + } catch (LeopardException e) { + for (int i = 0; i < error.length; i++) { + assertEquals(e.getMessageStack()[i], error[i]); + } + } + } } @RunWith(Parameterized.class) From 855bcf1879b0037c577bdd6c39632391e4a8cfef Mon Sep 17 00:00:00 2001 From: Kwangsoo Yeo Date: Tue, 21 Nov 2023 17:50:32 -0800 Subject: [PATCH 04/11] update test + demo --- binding/android/Leopard/leopard/build.gradle | 2 +- binding/android/LeopardTestApp/.gitignore | 1 - binding/android/LeopardTestApp/build.gradle | 3 ++ .../leopard-test-app/build.gradle | 2 +- .../picovoice/leopard/testapp/BaseTest.java | 14 ++++---- .../leopard/testapp/LeopardTest.java | 34 +++++++++---------- demo/android/LeopardDemo/build.gradle | 3 ++ .../LeopardDemo/leopard-demo-app/build.gradle | 2 +- .../picovoice/leoparddemo/MainActivity.java | 8 +++-- .../src/main/res/layout/leopard_demo.xml | 21 +++++++++--- .../src/main/res/layout/recyclerview_row.xml | 15 ++++++-- 11 files changed, 67 insertions(+), 38 deletions(-) diff --git a/binding/android/Leopard/leopard/build.gradle b/binding/android/Leopard/leopard/build.gradle index bdf0c785..f48e4b93 100644 --- a/binding/android/Leopard/leopard/build.gradle +++ b/binding/android/Leopard/leopard/build.gradle @@ -38,7 +38,7 @@ dependencies { } task copyLibs(type: Copy) { - from("${rootDir}/../../lib/android") + from("${rootDir}/../../../lib/android") into("${rootDir}/leopard/src/main/jniLibs") } diff --git a/binding/android/LeopardTestApp/.gitignore b/binding/android/LeopardTestApp/.gitignore index 66b0063a..76b91577 100644 --- a/binding/android/LeopardTestApp/.gitignore +++ b/binding/android/LeopardTestApp/.gitignore @@ -12,4 +12,3 @@ test_resources *.wav *.jks -!.dummy.jks diff --git a/binding/android/LeopardTestApp/build.gradle b/binding/android/LeopardTestApp/build.gradle index d1001bd5..0a1e8246 100644 --- a/binding/android/LeopardTestApp/build.gradle +++ b/binding/android/LeopardTestApp/build.gradle @@ -16,6 +16,9 @@ allprojects { repositories { google() mavenCentral() + maven { + url 'https://s01.oss.sonatype.org/content/repositories/aipicovoice-1299/' + } } } diff --git a/binding/android/LeopardTestApp/leopard-test-app/build.gradle b/binding/android/LeopardTestApp/leopard-test-app/build.gradle index f91431ec..cb7c3dd3 100644 --- a/binding/android/LeopardTestApp/leopard-test-app/build.gradle +++ b/binding/android/LeopardTestApp/leopard-test-app/build.gradle @@ -140,7 +140,7 @@ dependencies { implementation 'androidx.appcompat:appcompat:1.6.1' implementation 'com.google.android.material:material:1.8.0' implementation 'androidx.constraintlayout:constraintlayout:2.1.4' - implementation 'ai.picovoice:leopard-android:1.2.1' + implementation 'ai.picovoice:leopard-android:2.0.0' // Espresso UI Testing androidTestImplementation 'androidx.test.ext:junit:1.1.5' diff --git a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java index cb95acdb..73129398 100644 --- a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java +++ b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java @@ -101,7 +101,7 @@ protected static short[] readAudioFile(String audioFile) throws Exception { short[] pcm = new short[rawData.length / 2]; ByteBuffer pcmBuff = ByteBuffer.wrap(rawData).order(ByteOrder.LITTLE_ENDIAN); pcmBuff.asShortBuffer().get(pcm); - pcm = Arrays.copyOfRange(pcm, 44, pcm.length); + pcm = Arrays.copyOfRange(pcm, 22, pcm.length); return pcm; } @@ -112,14 +112,14 @@ protected void validateMetadata( boolean enableDiarization ) { for (int i = 0; i < words.length; i++) { - assertEquals(words[i].word, expectedWords[i].word); - assertTrue(Math.abs(words[i].confidence - expectedWords[i].confidence) <= 0.01); - assertTrue(Math.abs(words[i].startSec - expectedWords[i].startSec) <= 0.01); - assertTrue(Math.abs(words[i].endSec - expectedWords[i].endSec) <= 0.01); + assertEquals(words[i].getWord(), expectedWords[i].getWord()); + assertTrue(Math.abs(words[i].getConfidence() - expectedWords[i].getConfidence()) <= 0.01); + assertTrue(Math.abs(words[i].getStartSec() - expectedWords[i].getStartSec()) <= 0.01); + assertTrue(Math.abs(words[i].getEndSec() - expectedWords[i].getEndSec()) <= 0.01); if (enableDiarization) { - assertEquals(words[i].speakerTag, expectedWords[i].speakerTag); + assertEquals(words[i].getSpeakerTag(), expectedWords[i].getSpeakerTag()); } else { - assertEquals(words[i].speakerTag, -1); + assertEquals(words[i].getSpeakerTag(), -1); } } } diff --git a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java index 278caf01..8f27fec6 100644 --- a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java +++ b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java @@ -12,7 +12,7 @@ package ai.picovoice.leopard.testapp; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.*; import com.google.gson.JsonArray; import com.google.gson.JsonObject; @@ -177,7 +177,9 @@ public static Collection initParameters() throws IOException { JsonParser parser = new JsonParser(); JsonObject testDataJson = parser.parse(testDataJsonString).getAsJsonObject(); - JsonArray languageTests = testDataJson.getAsJsonArray("language_tests"); + JsonArray languageTests = testDataJson + .getAsJsonObject("tests") + .getAsJsonArray("language_tests"); List parameters = new ArrayList<>(); for (int i = 0; i < languageTests.size(); i++) { @@ -310,7 +312,7 @@ public void testTranscribeAudioDataWithDiarization() throws Exception { Leopard leopard = new Leopard.Builder() .setAccessKey(accessKey) .setModelPath(modelPath) - .setEnableDiarizations(true) + .setEnableDiarization(true) .build(appContext); File audioFile = new File(testResourcesPath, testAudioFile); @@ -331,7 +333,7 @@ public void testTranscribeAudioDataWithDiarization() throws Exception { } @RunWith(Parameterized.class) - public static class LanguageTests extends BaseTest { + public static class DiarizationTests extends BaseTest { @Parameterized.Parameter(value = 0) public String language; @@ -350,7 +352,9 @@ public static Collection initParameters() throws IOException { JsonParser parser = new JsonParser(); JsonObject testDataJson = parser.parse(testDataJsonString).getAsJsonObject(); - JsonArray languageTests = testDataJson.getAsJsonArray("diarization_tests"); + JsonArray languageTests = testDataJson + .getAsJsonObject("tests") + .getAsJsonArray("diarization_tests"); List parameters = new ArrayList<>(); for (int i = 0; i < languageTests.size(); i++) { @@ -374,16 +378,13 @@ public static Collection initParameters() throws IOException { JsonObject wordObject = words.get(j).getAsJsonObject(); String word = wordObject.get("word").getAsString(); - float confidence = wordObject.get("confidence").getAsFloat(); - float startSec = wordObject.get("start_sec").getAsFloat(); - float endSec = wordObject.get("end_sec").getAsFloat(); int speakerTag = wordObject.get("speaker_tag").getAsInt(); paramWords[j] = new LeopardTranscript.Word( word, - confidence, - startSec, - endSec, + 0.f, + 0.f, + 0.f, speakerTag ); } @@ -405,7 +406,7 @@ public void testDiarizationMultipleSpeakers() throws Exception { Leopard leopard = new Leopard.Builder() .setAccessKey(accessKey) .setModelPath(modelPath) - .setEnableDiarizations(true) + .setEnableDiarization(true) .build(appContext); File audioFile = new File(testResourcesPath, testAudioFile); @@ -413,11 +414,10 @@ public void testDiarizationMultipleSpeakers() throws Exception { LeopardTranscript result = leopard.process(pcm); - validateMetadata( - result.getWordArray(), - expectedWords, - true - ); + for (int i = 0; i < result.getWordArray().length; i++) { + assertEquals(result.getWordArray()[i].getWord(), expectedWords[i].getWord()); + assertEquals(result.getWordArray()[i].getSpeakerTag(), expectedWords[i].getSpeakerTag()); + } leopard.delete(); } } diff --git a/demo/android/LeopardDemo/build.gradle b/demo/android/LeopardDemo/build.gradle index d1001bd5..0a1e8246 100644 --- a/demo/android/LeopardDemo/build.gradle +++ b/demo/android/LeopardDemo/build.gradle @@ -16,6 +16,9 @@ allprojects { repositories { google() mavenCentral() + maven { + url 'https://s01.oss.sonatype.org/content/repositories/aipicovoice-1299/' + } } } diff --git a/demo/android/LeopardDemo/leopard-demo-app/build.gradle b/demo/android/LeopardDemo/leopard-demo-app/build.gradle index ebcfe30c..a9187e36 100644 --- a/demo/android/LeopardDemo/leopard-demo-app/build.gradle +++ b/demo/android/LeopardDemo/leopard-demo-app/build.gradle @@ -70,7 +70,7 @@ dependencies { implementation 'androidx.appcompat:appcompat:1.6.1' implementation 'com.google.android.material:material:1.8.0' implementation 'androidx.constraintlayout:constraintlayout:2.1.4' - implementation 'ai.picovoice:leopard-android:1.2.1' + implementation 'ai.picovoice:leopard-android:2.0.0' implementation 'ai.picovoice:android-voice-processor:1.0.2' } diff --git a/demo/android/LeopardDemo/leopard-demo-app/src/main/java/ai/picovoice/leoparddemo/MainActivity.java b/demo/android/LeopardDemo/leopard-demo-app/src/main/java/ai/picovoice/leoparddemo/MainActivity.java index 56d910f5..aafa26b0 100644 --- a/demo/android/LeopardDemo/leopard-demo-app/src/main/java/ai/picovoice/leoparddemo/MainActivity.java +++ b/demo/android/LeopardDemo/leopard-demo-app/src/main/java/ai/picovoice/leoparddemo/MainActivity.java @@ -108,7 +108,8 @@ protected void onCreate(Bundle savedInstanceState) { try { Leopard.Builder builder = new Leopard.Builder() .setAccessKey(ACCESS_KEY) - .setEnableAutomaticPunctuation(true); + .setEnableAutomaticPunctuation(true) + .setEnableDiarization(true); String model; if (Objects.equals(BuildConfig.FLAVOR, "en")) { @@ -120,7 +121,7 @@ protected void onCreate(Bundle savedInstanceState) { leopard = builder.build(getApplicationContext()); } catch (LeopardInvalidArgumentException e) { - displayError(String.format("%s\nEnsure your AccessKey '%s' is valid", e.getMessage(), ACCESS_KEY)); + displayError(e.getMessage()); } catch (LeopardActivationException e) { displayError("AccessKey activation error"); } catch (LeopardActivationLimitException e) { @@ -321,6 +322,7 @@ public void onBindViewHolder(ViewHolder holder, int position) { holder.startSec.setText(String.format("%.2fs", word.getStartSec())); holder.endSec.setText(String.format("%.2fs", word.getEndSec())); holder.confidence.setText(String.format("%.0f%%", word.getConfidence() * 100)); + holder.speakerTag.setText(String.format("%d", word.getSpeakerTag())); } @Override @@ -333,6 +335,7 @@ public static class ViewHolder extends RecyclerView.ViewHolder { TextView startSec; TextView endSec; TextView confidence; + TextView speakerTag; ViewHolder(View itemView) { super(itemView); @@ -340,6 +343,7 @@ public static class ViewHolder extends RecyclerView.ViewHolder { startSec = itemView.findViewById(R.id.startSec); endSec = itemView.findViewById(R.id.endSec); confidence = itemView.findViewById(R.id.confidence); + speakerTag = itemView.findViewById(R.id.speakerTag); } } } diff --git a/demo/android/LeopardDemo/leopard-demo-app/src/main/res/layout/leopard_demo.xml b/demo/android/LeopardDemo/leopard-demo-app/src/main/res/layout/leopard_demo.xml index 036a3f88..0db5c6b7 100644 --- a/demo/android/LeopardDemo/leopard-demo-app/src/main/res/layout/leopard_demo.xml +++ b/demo/android/LeopardDemo/leopard-demo-app/src/main/res/layout/leopard_demo.xml @@ -70,11 +70,11 @@ android:id="@+id/verboseResultsHeader" android:layout_width="match_parent" android:layout_height="wrap_content" - android:layout_marginBottom="5dp" + android:layout_margin="5dp" android:orientation="horizontal"> + + + + Date: Tue, 21 Nov 2023 17:55:27 -0800 Subject: [PATCH 05/11] fix codestyle --- .../leopard/src/main/java/ai/picovoice/leopard/Leopard.java | 6 +++--- .../main/java/ai/picovoice/leopard/LeopardTranscript.java | 6 ++++++ .../java/ai/picovoice/leopard/testapp/BaseTest.java | 6 +++--- .../java/ai/picovoice/leopard/testapp/LeopardTest.java | 3 ++- resources/.lint/java/suppress.xml | 2 +- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java index 8463bb91..913ffe29 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java @@ -50,9 +50,9 @@ public class Leopard { * @param accessKey AccessKey obtained from Picovoice Console * @param modelPath Absolute path to the file containing Leopard model parameters. * @param enableAutomaticPunctuation Set to `true` to enable automatic punctuation insertion. - * @param enableDiarization Set to `true` to enable speaker diarization, which allows Leopard to differentiate speakers - * as part of the transcription process. Word metadata will include a `speaker_tag` to - * identify unique speakers. + * @param enableDiarization Set to `true` to enable speaker diarization, which allows Leopard to + * differentiate speakers as part of the transcription process. Word + * metadata will include a `speaker_tag` to identify unique speakers. * @throws LeopardException if there is an error while initializing Leopard. */ private Leopard( diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java index 8212784d..36477b69 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java @@ -12,6 +12,9 @@ package ai.picovoice.leopard; +/** + * LeopardTranscript Class. + */ public class LeopardTranscript { private final String transcriptString; @@ -46,6 +49,9 @@ public Word[] getWordArray() { return wordArray; } + /** + * LeopardTranscript.Word class + */ public static class Word { private final String word; private final float confidence; diff --git a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java index 73129398..5caf49b1 100644 --- a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java +++ b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java @@ -107,9 +107,9 @@ protected static short[] readAudioFile(String audioFile) throws Exception { } protected void validateMetadata( - LeopardTranscript.Word[] words, - LeopardTranscript.Word[] expectedWords, - boolean enableDiarization + LeopardTranscript.Word[] words, + LeopardTranscript.Word[] expectedWords, + boolean enableDiarization ) { for (int i = 0; i < words.length; i++) { assertEquals(words[i].getWord(), expectedWords[i].getWord()); diff --git a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java index 8f27fec6..c5920af7 100644 --- a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java +++ b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java @@ -271,7 +271,8 @@ public void testTranscribeAudioFileWithPunctuation() throws Exception { boolean useCER = language.equals("ja"); LeopardTranscript result = leopard.processFile(audioFile.getAbsolutePath()); - assertTrue(getWordErrorRate(result.getTranscriptString(), expectedTranscriptWithPunctuation, useCER) < errorRate); + assertTrue(getWordErrorRate( + result.getTranscriptString(), expectedTranscriptWithPunctuation, useCER) < errorRate); validateMetadata( result.getWordArray(), diff --git a/resources/.lint/java/suppress.xml b/resources/.lint/java/suppress.xml index 6cef7966..82463156 100644 --- a/resources/.lint/java/suppress.xml +++ b/resources/.lint/java/suppress.xml @@ -9,7 +9,7 @@ - + From 22333c8594f8d2a1fb6c52ec3f5b1faa52749e2b Mon Sep 17 00:00:00 2001 From: Kwangsoo Yeo Date: Tue, 21 Nov 2023 17:57:11 -0800 Subject: [PATCH 06/11] fix indent --- .../java/ai/picovoice/leopard/testapp/LeopardTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java index c5920af7..7d2efd95 100644 --- a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java +++ b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java @@ -272,7 +272,7 @@ public void testTranscribeAudioFileWithPunctuation() throws Exception { LeopardTranscript result = leopard.processFile(audioFile.getAbsolutePath()); assertTrue(getWordErrorRate( - result.getTranscriptString(), expectedTranscriptWithPunctuation, useCER) < errorRate); + result.getTranscriptString(), expectedTranscriptWithPunctuation, useCER) < errorRate); validateMetadata( result.getWordArray(), From 469f1dd39b4593644c0172ab9fb47587462b08c0 Mon Sep 17 00:00:00 2001 From: Kwangsoo Yeo Date: Wed, 22 Nov 2023 15:57:49 -0800 Subject: [PATCH 07/11] Rev --- .../src/main/java/ai/picovoice/leopard/Leopard.java | 9 ++++++++- .../java/ai/picovoice/leopard/LeopardTranscript.java | 12 ++++++++++-- .../leopard/exception/LeopardException.java | 2 -- .../java/ai/picovoice/leopard/testapp/BaseTest.java | 8 ++++---- .../ai/picovoice/leopard/testapp/LeopardTest.java | 3 ++- 5 files changed, 24 insertions(+), 10 deletions(-) diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java index 913ffe29..a2719ab7 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java @@ -43,6 +43,11 @@ public class Leopard { } private long handle; + private static String _sdk = "android"; + + public static void setSdk(String sdk) { + Leopard._sdk = sdk; + } /** * Constructor. @@ -220,7 +225,9 @@ public Builder setEnableAutomaticPunctuation(boolean enableAutomaticPunctuation) /** * Setter for enabling speaker diarization. * - * @param enableDiarization Set to `true` to enable speaker diarization. + * @param enableDiarization Set to `true` to enable speaker diarization, which allows Leopard to + * differentiate speakers as part of the transcription process. Word + * metadata will include a `speaker_tag` to identify unique speakers. */ public Builder setEnableDiarization(boolean enableDiarization) { this.enableDiarization = enableDiarization; diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java index 36477b69..43c4c6d5 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java @@ -66,9 +66,17 @@ public static class Word { * @param confidence Transcription confidence. It is a number within [0, 1]. * @param startSec Start of word in seconds. * @param endSec End of word in seconds. - * @param speakerTag Speaker tag. It is set to `-1` if speaker diarization is not enabled during initialization. + * @param speakerTag The speaker tag is `-1` if diarization is not enabled during initialization; + * otherwise, it's a non-negative integer identifying unique speakers, with `0` reserved for + * unknown speakers. */ - public Word(String word, float confidence, float startSec, float endSec, int speakerTag) { + public Word( + String word, + float confidence, + float startSec, + float endSec, + int speakerTag + ) { this.word = word; this.confidence = confidence; this.startSec = startSec; diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardException.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardException.java index 945c9947..1828ed5e 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardException.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/exception/LeopardException.java @@ -47,8 +47,6 @@ public String getMessage() { for (int i = 0; i < messageStack.length; i++) { sb.append(String.format("\n [%d] %s", i, messageStack[i])); } - } else { - sb.append("."); } } return sb.toString(); diff --git a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java index 5caf49b1..c4a04845 100644 --- a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java +++ b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/BaseTest.java @@ -27,7 +27,6 @@ import org.junit.Before; import org.junit.Rule; -import java.lang.Math; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; @@ -111,11 +110,12 @@ protected void validateMetadata( LeopardTranscript.Word[] expectedWords, boolean enableDiarization ) { + assertEquals(words.length, expectedWords.length); for (int i = 0; i < words.length; i++) { assertEquals(words[i].getWord(), expectedWords[i].getWord()); - assertTrue(Math.abs(words[i].getConfidence() - expectedWords[i].getConfidence()) <= 0.01); - assertTrue(Math.abs(words[i].getStartSec() - expectedWords[i].getStartSec()) <= 0.01); - assertTrue(Math.abs(words[i].getEndSec() - expectedWords[i].getEndSec()) <= 0.01); + assertEquals(words[i].getConfidence(), expectedWords[i].getConfidence(), 0.01); + assertEquals(words[i].getStartSec(), expectedWords[i].getStartSec(), 0.01); + assertEquals(words[i].getEndSec(), expectedWords[i].getEndSec(), 0.01); if (enableDiarization) { assertEquals(words[i].getSpeakerTag(), expectedWords[i].getSpeakerTag()); } else { diff --git a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java index 7d2efd95..893a3e84 100644 --- a/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java +++ b/binding/android/LeopardTestApp/leopard-test-app/src/androidTest/java/ai/picovoice/leopard/testapp/LeopardTest.java @@ -384,7 +384,7 @@ public static Collection initParameters() throws IOException { paramWords[j] = new LeopardTranscript.Word( word, 0.f, - 0.f, + 0.f, 0.f, speakerTag ); @@ -415,6 +415,7 @@ public void testDiarizationMultipleSpeakers() throws Exception { LeopardTranscript result = leopard.process(pcm); + assertEquals(result.getWordArray().length, expectedWords.length); for (int i = 0; i < result.getWordArray().length; i++) { assertEquals(result.getWordArray()[i].getWord(), expectedWords[i].getWord()); assertEquals(result.getWordArray()[i].getSpeakerTag(), expectedWords[i].getSpeakerTag()); From 75008fd38beb0abc6e98a86239393622825ce900 Mon Sep 17 00:00:00 2001 From: Kwangsoo Yeo Date: Wed, 22 Nov 2023 15:59:02 -0800 Subject: [PATCH 08/11] fix lint --- .../java/ai/picovoice/leopard/LeopardTranscript.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java index 43c4c6d5..5bb58231 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/LeopardTranscript.java @@ -71,11 +71,11 @@ public static class Word { * unknown speakers. */ public Word( - String word, - float confidence, - float startSec, - float endSec, - int speakerTag + String word, + float confidence, + float startSec, + float endSec, + int speakerTag ) { this.word = word; this.confidence = confidence; From 771717dd3bd084236bc4cfbb9d6b35e4664e7597 Mon Sep 17 00:00:00 2001 From: Kwangsoo Yeo Date: Wed, 22 Nov 2023 16:07:52 -0800 Subject: [PATCH 09/11] update package --- binding/android/LeopardTestApp/build.gradle | 2 +- demo/android/LeopardDemo/build.gradle | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/binding/android/LeopardTestApp/build.gradle b/binding/android/LeopardTestApp/build.gradle index 0a1e8246..2dcb93ac 100644 --- a/binding/android/LeopardTestApp/build.gradle +++ b/binding/android/LeopardTestApp/build.gradle @@ -17,7 +17,7 @@ allprojects { google() mavenCentral() maven { - url 'https://s01.oss.sonatype.org/content/repositories/aipicovoice-1299/' + url 'https://s01.oss.sonatype.org/content/repositories/aipicovoice-1303/' } } } diff --git a/demo/android/LeopardDemo/build.gradle b/demo/android/LeopardDemo/build.gradle index 0a1e8246..2dcb93ac 100644 --- a/demo/android/LeopardDemo/build.gradle +++ b/demo/android/LeopardDemo/build.gradle @@ -17,7 +17,7 @@ allprojects { google() mavenCentral() maven { - url 'https://s01.oss.sonatype.org/content/repositories/aipicovoice-1299/' + url 'https://s01.oss.sonatype.org/content/repositories/aipicovoice-1303/' } } } From 3fba4f96016afb59084d30fe6bf54cc687539c69 Mon Sep 17 00:00:00 2001 From: Kwangsoo Yeo Date: Wed, 22 Nov 2023 17:32:05 -0800 Subject: [PATCH 10/11] rev --- .../leopard/src/main/java/ai/picovoice/leopard/Leopard.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java index a2719ab7..d13eb457 100644 --- a/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java +++ b/binding/android/Leopard/leopard/src/main/java/ai/picovoice/leopard/Leopard.java @@ -65,6 +65,8 @@ private Leopard( String modelPath, boolean enableAutomaticPunctuation, boolean enableDiarization) throws LeopardException { + LeopardNative.setSdk(Leopard._sdk); + handle = LeopardNative.init( accessKey, modelPath, From f09309fc30913844eabbe11feaaced6fd8f330ee Mon Sep 17 00:00:00 2001 From: Kwangsoo Yeo Date: Wed, 22 Nov 2023 17:34:03 -0800 Subject: [PATCH 11/11] update gradle --- binding/android/LeopardTestApp/build.gradle | 2 +- demo/android/LeopardDemo/build.gradle | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/binding/android/LeopardTestApp/build.gradle b/binding/android/LeopardTestApp/build.gradle index 2dcb93ac..1262a8f1 100644 --- a/binding/android/LeopardTestApp/build.gradle +++ b/binding/android/LeopardTestApp/build.gradle @@ -17,7 +17,7 @@ allprojects { google() mavenCentral() maven { - url 'https://s01.oss.sonatype.org/content/repositories/aipicovoice-1303/' + url 'https://s01.oss.sonatype.org/content/repositories/aipicovoice-1305/' } } } diff --git a/demo/android/LeopardDemo/build.gradle b/demo/android/LeopardDemo/build.gradle index 2dcb93ac..1262a8f1 100644 --- a/demo/android/LeopardDemo/build.gradle +++ b/demo/android/LeopardDemo/build.gradle @@ -17,7 +17,7 @@ allprojects { google() mavenCentral() maven { - url 'https://s01.oss.sonatype.org/content/repositories/aipicovoice-1303/' + url 'https://s01.oss.sonatype.org/content/repositories/aipicovoice-1305/' } } }