-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: issue with isModelGenerating when switching between multiple models #73
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,6 +64,7 @@ class LLM(reactContext: ReactApplicationContext) : | |
private fun initializeLlamaModule(modelPath: String, tokenizerPath: String, promise: Promise) { | ||
llamaModule = LlamaModule(1, modelPath, tokenizerPath, 0.7f) | ||
isFetching = false | ||
this.tempLlamaResponse.clear() | ||
promise.resolve("Model loaded successfully") | ||
} | ||
|
||
|
@@ -74,8 +75,8 @@ class LLM(reactContext: ReactApplicationContext) : | |
contextWindowLength: Double, | ||
promise: Promise | ||
) { | ||
if (llamaModule != null || isFetching) { | ||
promise.reject("Model already loaded", "Model is already loaded or fetching") | ||
if (isFetching) { | ||
promise.reject("Model already loaded", "Model is already fetching") | ||
return | ||
} | ||
|
||
|
@@ -148,10 +149,6 @@ class LLM(reactContext: ReactApplicationContext) : | |
llamaModule!!.stop() | ||
} | ||
|
||
override fun deleteModule() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we getting rid of this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I noticed that it doesn't really change memory usage, but let's leave it for now as it's not breaking anything |
||
llamaModule = null | ||
} | ||
|
||
companion object { | ||
const val NAME = "LLM" | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,7 @@ - (instancetype)init { | |
isFetching = NO; | ||
tempLlamaResponse = [[NSMutableString alloc] init]; | ||
} | ||
|
||
return self; | ||
} | ||
|
||
|
@@ -38,7 +38,7 @@ - (void)onResult:(NSString *)token prompt:(NSString *)prompt { | |
if ([token isEqualToString:prompt]) { | ||
return; | ||
} | ||
|
||
dispatch_async(dispatch_get_main_queue(), ^{ | ||
[self emitOnToken:token]; | ||
[self->tempLlamaResponse appendString:token]; | ||
|
@@ -54,8 +54,8 @@ - (void)updateDownloadProgress:(NSNumber *)progress { | |
- (void)loadLLM:(NSString *)modelSource tokenizerSource:(NSString *)tokenizerSource systemPrompt:(NSString *)systemPrompt contextWindowLength:(double)contextWindowLength resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { | ||
NSURL *modelURL = [NSURL URLWithString:modelSource]; | ||
NSURL *tokenizerURL = [NSURL URLWithString:tokenizerSource]; | ||
if(self->runner || isFetching){ | ||
|
||
if(isFetching){ | ||
reject(@"model_already_loaded", @"Model and tokenizer already loaded", nil); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this error message correct? you're not checking if its loaded |
||
return; | ||
} | ||
|
@@ -78,10 +78,11 @@ - (void)loadLLM:(NSString *)modelSource tokenizerSource:(NSString *)tokenizerSou | |
|
||
modelFetcher.onFinish = ^(NSString *modelFilePath) { | ||
self->runner = [[LLaMARunner alloc] initWithModelPath:modelFilePath tokenizerPath:tokenizerFilePath]; | ||
NSUInteger contextWindowLengthUInt = (NSUInteger)round(contextWindowLength); | ||
NSUInteger contextWindowLengthUInt = (NSUInteger)round(contextWindowLength); | ||
|
||
self->conversationManager = [[ConversationManager alloc] initWithNumMessagesContextWindow: contextWindowLengthUInt systemPrompt: systemPrompt]; | ||
self->isFetching = NO; | ||
self->tempLlamaResponse = [NSMutableString string]; | ||
resolve(@"Model and tokenizer loaded successfully"); | ||
return; | ||
}; | ||
|
@@ -94,23 +95,23 @@ - (void)loadLLM:(NSString *)modelSource tokenizerSource:(NSString *)tokenizerSou | |
- (void) runInference:(NSString *)input resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { | ||
[conversationManager addResponse:input senderRole:ChatRole::USER]; | ||
NSString *prompt = [conversationManager getConversation]; | ||
|
||
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ | ||
NSError *error = nil; | ||
[self->runner generate:prompt withTokenCallback:^(NSString *token) { | ||
[self onResult:token prompt:prompt]; | ||
[self onResult:token prompt:prompt]; | ||
} error:&error]; | ||
|
||
// make sure to add eot token once generation is done | ||
if (![self->tempLlamaResponse hasSuffix:END_OF_TEXT_TOKEN_NS]) { | ||
[self onResult:END_OF_TEXT_TOKEN_NS prompt:prompt]; | ||
} | ||
|
||
if (self->tempLlamaResponse) { | ||
[self->conversationManager addResponse:self->tempLlamaResponse senderRole:ChatRole::ASSISTANT]; | ||
self->tempLlamaResponse = [NSMutableString string]; | ||
} | ||
|
||
if (error) { | ||
reject(@"error_in_generation", error.localizedDescription, nil); | ||
return; | ||
|
@@ -125,10 +126,6 @@ -(void)interrupt { | |
[self->runner stop]; | ||
} | ||
|
||
-(void)deleteModule { | ||
self->runner = nil; | ||
} | ||
|
||
- (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:(const facebook::react::ObjCTurboModule::InitParams &)params | ||
{ | ||
return std::make_shared<facebook::react::NativeLLMSpecJSI>(params); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The first argument seems wrong