-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstt_service.proto
78 lines (60 loc) · 1.83 KB
/
stt_service.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
syntax = "proto3";
package sk.stt.v1;
import "google/protobuf/duration.proto";
service SttService {
rpc StreamingRecognize (stream StreamingRecognitionRequest) returns (stream StreamingRecognitionResponse) {
}
}
message StreamingRecognitionRequest {
oneof streaming_request {
RecognitionConfig config = 1;
bytes audio_content = 2;
}
}
message StreamingRecognitionResponse {
repeated SpeechRecognitionChunk chunks = 1;
reserved 2;
reserved "end_of_single_utterance";
}
message RecognitionConfig {
RecognitionSpec specification = 1;
}
message RecognitionSpec {
enum AudioEncoding {
AUDIO_ENCODING_UNSPECIFIED = 0;
// 16-bit signed little-endian (Linear PCM)
LINEAR16_PCM = 1;
}
AudioEncoding audio_encoding = 1;
// 8000, 16000, 48000 only for pcm
int64 sample_rate_hertz = 2;
// code in BCP-47
string language_code = 3;
bool profanity_filter = 4;
string model = 5;
// If set true, tentative hypotheses may be returned as they become available (final=false flag)
// If false or omitted, only final=true result(s) are returned.
// Makes sense only for StreamingRecognize requests.
bool partial_results = 7;
bool single_utterance = 8;
// This mark allows disable normalization text
bool raw_results = 10;
}
message SpeechRecognitionChunk {
repeated SpeechRecognitionAlternative alternatives = 1;
// This flag shows that the received chunk contains a part of the recognized text that won't be changed.
bool final = 2;
// This flag shows that the received chunk is the end of an utterance.
bool end_of_utterance = 3;
}
message SpeechRecognitionAlternative {
string text = 1;
float confidence = 2;
repeated WordInfo words = 3;
}
message WordInfo {
google.protobuf.Duration start_time = 1;
google.protobuf.Duration end_time = 2;
string word = 3;
float confidence = 4;
}