Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for logging timestamps while transcribing #32

Merged
merged 1 commit into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Hear.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
onDevice:(BOOL)useOnDeviceRecognition
singleLineMode:(BOOL)singleLine
addPunctuation:(BOOL)addPunctuation
addTimestamps:(BOOL)addTimestamps
exitWord:(NSString *)exitWord
timeout:(CGFloat)timeout;

Expand Down
25 changes: 22 additions & 3 deletions src/Hear.m
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ @interface Hear()
@property (nonatomic) BOOL useOnDeviceRecognition;
@property (nonatomic) BOOL singleLineMode;
@property (nonatomic) BOOL addPunctuation;
@property (nonatomic) BOOL addTimestamps;
@property (nonatomic, retain) NSString *exitWord;
@property (nonatomic) CGFloat timeout;

Expand All @@ -61,6 +62,7 @@ - (instancetype)initWithLocale:(NSString *)loc
onDevice:(BOOL)onDevice
singleLineMode:(BOOL)singleLine
addPunctuation:(BOOL)punctuation
addTimestamps:(BOOL)timestamps
exitWord:(NSString *)exitWord
timeout:(CGFloat)timeout {
self = [super init];
Expand All @@ -76,6 +78,7 @@ - (instancetype)initWithLocale:(NSString *)loc
self.singleLineMode = singleLine;
self.useDeviceInput = (input == nil);
self.addPunctuation = punctuation;
self.addTimestamps = timestamps;
self.exitWord = exitWord;
self.timeout = timeout;
}
Expand Down Expand Up @@ -193,13 +196,21 @@ - (void)processFile {
if (result == nil) {
return;
}


if (@available(macOS 13, *)) {
if (self.addTimestamps) {
SFSpeechRecognitionMetadata* meta = result.speechRecognitionMetadata;
NSString *timestamp = [[NSDateComponentsFormatter new] stringFromTimeInterval:meta.speechStartTimestamp];
NSDump([NSString stringWithFormat:@"\n%@ -> \n", timestamp]);
}
}

// Make sure there's a space between the incoming result strings
NSString *s = result.bestTranscription.formattedString;
if ([s hasSuffix:@" "] == FALSE && !result.isFinal) {
s = [NSString stringWithFormat:@"%@ ", s];
}

// Print to stdout without newline and flush
NSDump(s);

Expand Down Expand Up @@ -246,7 +257,15 @@ - (void)startListening {
if (self.timeout > 0) {
[self startTimer:self];
}


if (@available(macOS 13, *)) {
if (self.addTimestamps) {
SFSpeechRecognitionMetadata* meta = result.speechRecognitionMetadata;
NSString *timestamp = [[NSDateComponentsFormatter new] stringFromTimeInterval:meta.speechStartTimestamp];
NSDump([NSString stringWithFormat:@"\n%@ -> \n", timestamp]);
}
}

// Print to stdout
NSString *transcript = result.bestTranscription.formattedString;
if (self.singleLineMode) {
Expand Down
13 changes: 12 additions & 1 deletion src/main.m
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
static inline void PrintHelp(void);

// Command line options
static const char optstring[] = "sl:i:dpmx:t:hv";
static const char optstring[] = "sl:i:dpmx:t:Thv";

static struct option long_options[] = {
// List supported locales for speech to text
Expand All @@ -56,6 +56,8 @@
{"device", no_argument, 0, 'd'},
// Whether to add punctuation to speech recognition results
{"punctuation", no_argument, 0, 'p'},
// Whether to add timestamps when reading from a file
{"timestamp", no_argument, 0, 'T'},
// Enable single-line output mode (for mic)
{"mode", no_argument, 0, 'm'},
// Exit word
Expand Down Expand Up @@ -84,6 +86,7 @@ int main(int argc, const char * argv[]) { @autoreleasepool {
BOOL useOnDeviceRecognition = NO;
BOOL singleLineMode = NO;
BOOL addsPunctuation = NO;
BOOL addsTimestamps = NO;
CGFloat timeout = 0.0f;

// Parse arguments
Expand Down Expand Up @@ -124,6 +127,12 @@ int main(int argc, const char * argv[]) { @autoreleasepool {
case 'p':
addsPunctuation = YES;
break;

// Whether to add timestamps to speech recognition results
// This option is ignored on macOS versions prior to Ventura
case 'T':
addsTimestamps = YES;
break;

// Set exit word (causes app to exit when word detected in speech)
case 'x':
Expand Down Expand Up @@ -155,6 +164,7 @@ int main(int argc, const char * argv[]) { @autoreleasepool {
onDevice:useOnDeviceRecognition
singleLineMode:singleLineMode
addPunctuation:addsPunctuation
addTimestamps:addsTimestamps
exitWord:exitWord
timeout:timeout];
[[NSApplication sharedApplication] setDelegate:hear];
Expand Down Expand Up @@ -194,6 +204,7 @@ static inline void PrintHelp(void) {
-p --punctuation Add punctuation to speech recognition results (macOS 13+)\n\
-x --exit-word Set exit word that causes program to quit\n\
-t --timeout Set silence timeout (in seconds)\n\
-T --timestamps Write timestamps as transcription occurs\n\
\n\
-h --help Prints help\n\
-v --version Prints program name and version\n\
Expand Down