forked from youtube/cobalt_sandbox
-
Notifications
You must be signed in to change notification settings - Fork 0
/
speech_recognizer.h
202 lines (176 loc) · 8.46 KB
/
speech_recognizer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
// Copyright 2017 The Cobalt Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Module Overview: Starboard speech recognizer module
//
// Defines a streaming speech recognizer API. It provides access to the platform
// speech recognition service.
//
// Note that there can be only one speech recognizer. Attempting to create a
// second speech recognizer without destroying the first one will result in
// undefined behavior.
//
// |SbSpeechRecognizerCreate|, |SbSpeechRecognizerStart|,
// |SbSpeechRecognizerStop|, |SbSpeechRecognizerCancel| and
// |SbSpeechRecognizerDestroy| should be called from a single thread. Callbacks
// defined in |SbSpeechRecognizerHandler| will happen on another thread, so
// calls back into the SbSpeechRecognizer from the callback thread are
// disallowed.
#ifndef STARBOARD_SPEECH_RECOGNIZER_H_
#define STARBOARD_SPEECH_RECOGNIZER_H_
#include "starboard/configuration.h"
#include "starboard/export.h"
#include "starboard/types.h"
#if SB_API_VERSION >= 13
#error Speech Recognizer is deprecated, switch to microphone implementation.
#endif
#ifdef __cplusplus
extern "C" {
#endif
// An opaque handle to an implementation-private structure that represents a
// speech recognizer.
typedef struct SbSpeechRecognizerPrivate* SbSpeechRecognizer;
// Well-defined value for an invalid speech recognizer handle.
#define kSbSpeechRecognizerInvalid ((SbSpeechRecognizer)NULL)
// Indicates whether the given speech recognizer is valid.
static SB_C_INLINE bool SbSpeechRecognizerIsValid(
SbSpeechRecognizer recognizer) {
return recognizer != kSbSpeechRecognizerInvalid;
}
// Indicates what has gone wrong with the recognition.
typedef enum SbSpeechRecognizerError {
// No speech was detected. Speech timed out.
kSbNoSpeechError,
// Speech input was aborted somehow.
kSbAborted,
// Audio capture failed.
kSbAudioCaptureError,
// Some network communication that was required to complete the recognition
// failed.
kSbNetworkError,
// The implementation is not allowing any speech input to occur for reasons of
// security, privacy or user preference.
kSbNotAllowed,
// The implementation is not allowing the application requested speech
// service, but would allow some speech service, to be used either because the
// implementation doesn't support the selected one or because of reasons of
// security, privacy or user preference.
kSbServiceNotAllowed,
// There was an error in the speech recognition grammar or semantic tags, or
// the grammar format or semantic tag format is supported.
kSbBadGrammar,
// The language was not supported.
kSbLanguageNotSupported,
} SbSpeechRecognizerError;
// The recognition response that is received from the recognizer.
typedef struct SbSpeechResult {
// The raw words that the user spoke.
char* transcript;
// A numeric estimate between 0 and 1 of how confident the recognition system
// is that the recognition is correct. A higher number means the system is
// more confident. NaN represents an unavailable confidence score.
float confidence;
} SbSpeechResult;
typedef struct SbSpeechConfiguration {
// When the continuous value is set to false, the implementation MUST
// return no more than one final result in response to starting recognition.
// When the continuous attribute is set to true, the implementation MUST
// return zero or more final results representing multiple consecutive
// recognitions in response to starting recognition. This attribute setting
// does not affect interim results.
bool continuous;
// Controls whether interim results are returned. When set to true, interim
// results SHOULD be returned. When set to false, interim results MUST NOT be
// returned. This value setting does not affect final results.
bool interim_results;
// This sets the maximum number of SbSpeechResult in
// |SbSpeechRecognizerOnResults| callback.
int max_alternatives;
} SbSpeechConfiguration;
// A function to notify that the user has started to speak or stops speaking.
// |detected|: true if the user has started to speak, and false if the user
// stops speaking.
typedef void (*SbSpeechRecognizerSpeechDetectedFunction)(void* context,
bool detected);
// A function to notify that a speech recognition error occurred.
// |error|: The occurred speech recognition error.
typedef void (*SbSpeechRecognizerErrorFunction)(void* context,
SbSpeechRecognizerError error);
// A function to notify that the recognition results are ready.
// |results|: the list of recognition results.
// |results_size|: the number of |results|.
// |is_final|: indicates if the |results| is final.
typedef void (*SbSpeechRecognizerResultsFunction)(void* context,
SbSpeechResult* results,
int results_size,
bool is_final);
// Allows receiving notifications from the device when recognition related
// events occur.
//
// The void* context is passed to every function.
struct SbSpeechRecognizerHandler {
// Function to notify the beginning/end of the speech.
SbSpeechRecognizerSpeechDetectedFunction on_speech_detected;
// Function to notify the speech error.
SbSpeechRecognizerErrorFunction on_error;
// Function to notify that the recognition results are available.
SbSpeechRecognizerResultsFunction on_results;
// This is passed to handler functions as first argument.
void* context;
};
// Returns whether the platform supports SbSpeechRecognizer.
SB_EXPORT bool SbSpeechRecognizerIsSupported();
// Creates a speech recognizer with a speech recognizer handler.
//
// If the system has a speech recognition service available, this function
// returns the newly created handle.
//
// If no speech recognition service is available on the device, this function
// returns |kSbSpeechRecognizerInvalid|.
//
// |SbSpeechRecognizerCreate| does not expect the passed
// SbSpeechRecognizerHandler structure to live after |SbSpeechRecognizerCreate|
// is called, so the implementation must copy the contents if necessary.
SB_EXPORT SbSpeechRecognizer
SbSpeechRecognizerCreate(const SbSpeechRecognizerHandler* handler);
// Starts listening to audio and recognizing speech with the specified speech
// configuration. If |SbSpeechRecognizerStart| is called on an already
// started speech recognizer, the implementation MUST ignore the call and return
// false.
//
// Returns whether the speech recognizer is started successfully.
SB_EXPORT bool SbSpeechRecognizerStart(
SbSpeechRecognizer recognizer,
const SbSpeechConfiguration* configuration);
// Stops listening to audio and returns a result using just the audio that it
// has already received. Once |SbSpeechRecognizerStop| is called, the
// implementation MUST NOT collect additional audio and MUST NOT continue to
// listen to the user. This is important for privacy reasons. If
// |SbSpeechRecognizerStop| is called on a speech recognizer which is already
// stopped or being stopped, the implementation MUST ignore the call.
SB_EXPORT void SbSpeechRecognizerStop(SbSpeechRecognizer recognizer);
// Cancels speech recognition. The speech recognizer stops listening to
// audio and does not return any information. When |SbSpeechRecognizerCancel| is
// called, the implementation MUST NOT collect additional audio, MUST NOT
// continue to listen to the user, and MUST stop recognizing. This is important
// for privacy reasons. If |SbSpeechRecognizerCancel| is called on a speech
// recognizer which is already stopped or cancelled, the implementation MUST
// ignore the call.
SB_EXPORT void SbSpeechRecognizerCancel(SbSpeechRecognizer recognizer);
// Destroys the given speech recognizer. If the speech recognizer is in the
// started state, it is first stopped and then destroyed.
SB_EXPORT void SbSpeechRecognizerDestroy(SbSpeechRecognizer recognizer);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // STARBOARD_SPEECH_RECOGNIZER_H_