Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions app/client/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export interface LLMConfig {
stream?: boolean;
presence_penalty?: number;
frequency_penalty?: number;
enable_thinking?: boolean;
}

export interface ChatOptions {
Expand Down
48 changes: 46 additions & 2 deletions app/client/webllm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,20 @@ export class WebLLMApi implements LLMApi {
async chat(options: ChatOptions): Promise<void> {
if (!this.initialized || this.isDifferentConfig(options.config)) {
this.llmConfig = { ...(this.llmConfig || {}), ...options.config };
// Check if this is a Qwen3 model with thinking mode enabled
const isQwen3Model = this.llmConfig?.model
?.toLowerCase()
.startsWith("qwen3");
const isThinkingEnabled = this.llmConfig?.enable_thinking === true;

// Apply special config for Qwen3 models with thinking mode enabled
if (isQwen3Model && isThinkingEnabled && this.llmConfig) {
this.llmConfig = {
...this.llmConfig,
temperature: 0.6,
top_p: 0.95,
};
}
try {
await this.initModel(options.onUpdate);
} catch (err: any) {
Expand Down Expand Up @@ -160,13 +174,14 @@ export class WebLLMApi implements LLMApi {
"stream",
"presence_penalty",
"frequency_penalty",
"enable_thinking",
];

for (const field of optionalFields) {
if (
this.llmConfig[field] !== undefined &&
config[field] !== undefined &&
config[field] !== config[field]
this.llmConfig[field] !== config[field]
) {
return true;
}
Expand All @@ -184,10 +199,39 @@ export class WebLLMApi implements LLMApi {
usage?: CompletionUsage,
) => void,
) {
// For Qwen3 models, we need to filter out the <think>...</think> content
// Do not do it inplace, create a new messages array
let newMessages: RequestMessage[] | undefined;
const isQwen3Model = this.llmConfig?.model
?.toLowerCase()
.startsWith("qwen3");
if (isQwen3Model) {
newMessages = messages.map((message) => {
const newMessage = { ...message };
if (
message.role === "assistant" &&
typeof message.content === "string"
) {
newMessage.content = message.content.replace(
/^<think>[\s\S]*?<\/think>\n?\n?/,
"",
);
}
return newMessage;
});
}

// Prepare extra_body with enable_thinking option for Qwen3 models
const extraBody: Record<string, any> = {};
if (isQwen3Model) {
extraBody.enable_thinking = this.llmConfig?.enable_thinking ?? false;
}

const completion = await this.webllm.engine.chatCompletion({
stream: stream,
messages: messages as ChatCompletionMessageParam[],
messages: (newMessages || messages) as ChatCompletionMessageParam[],
...(stream ? { stream_options: { include_usage: true } } : {}),
...(Object.keys(extraBody).length > 0 ? { extra_body: extraBody } : {}),
});

if (stream) {
Expand Down
4 changes: 4 additions & 0 deletions app/components/chat.module.scss
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@
width: var(--icon-width);
overflow: hidden;

&.selected {
background-color: var(--second);
}

&:not(:last-child) {
margin-right: 5px;
}
Expand Down
18 changes: 16 additions & 2 deletions app/components/chat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import DeleteIcon from "../icons/clear.svg";
import EditIcon from "../icons/rename.svg";
import ConfirmIcon from "../icons/confirm.svg";
import ImageIcon from "../icons/image.svg";
import BrainIcon from "../icons/brain.svg";

import BottomIcon from "../icons/bottom.svg";
import StopIcon from "../icons/pause.svg";
Expand Down Expand Up @@ -385,6 +386,7 @@ function ChatAction(props: {
icon: JSX.Element;
onClick: () => void;
fullWidth?: boolean;
selected?: boolean;
}) {
const iconRef = useRef<HTMLDivElement>(null);
const textRef = useRef<HTMLDivElement>(null);
Expand All @@ -406,7 +408,7 @@ function ChatAction(props: {

return props.fullWidth ? (
<div
className={`${styles["chat-input-action"]} clickable ${styles["full-width"]}`}
className={`${styles["chat-input-action"]} clickable ${styles["full-width"]} ${props.selected ? styles["selected"] : ""}`}
onClick={props.onClick}
>
<div ref={iconRef} className={styles["icon"]}>
Expand All @@ -418,7 +420,7 @@ function ChatAction(props: {
</div>
) : (
<div
className={`${styles["chat-input-action"]} clickable`}
className={`${styles["chat-input-action"]} clickable ${props.selected ? styles["selected"] : ""}`}
onClick={() => {
props.onClick();
setTimeout(updateWidth, 1);
Expand Down Expand Up @@ -535,6 +537,18 @@ export function ChatActions(props: {
});
}}
/>
{config.modelConfig.model.toLowerCase().startsWith("qwen3") && (
<ChatAction
onClick={() =>
config.update(
(config) => (config.enableThinking = !config.enableThinking),
)
}
text={Locale.Settings.THINKING}
icon={<BrainIcon />}
selected={config.enableThinking}
/>
)}
<ChatAction
onClick={() => setShowModelSelector(true)}
text={currentModel}
Expand Down
18 changes: 18 additions & 0 deletions app/components/model-config.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,24 @@ export function ModelConfigList() {
</Select>
</ListItem>

{config.modelConfig.model.toLowerCase().startsWith("qwen3") && (
<ListItem
title={Locale.Settings.EnableThinking.Title}
subTitle={Locale.Settings.EnableThinking.SubTitle}
>
<input
type="checkbox"
checked={config.enableThinking}
onChange={(e) =>
config.update(
(config) =>
(config.enableThinking = e.currentTarget.checked),
)
}
></input>
</ListItem>
)}

{/* New setting item for LLM model context window length */}
<ListItem
title={Locale.Settings.ContextWindowLength.Title}
Expand Down
75 changes: 63 additions & 12 deletions app/constant.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,20 @@ export enum ModelFamily {
DEEPSEEK = "DeepSeek",
}

const qwen3_common_configs = {
display_name: "Qwen",
provider: "Alibaba",
family: ModelFamily.QWEN,
// Recommended config is for non-thinking mode
// For thinking mode, see webllm.ts where temperature=0.6 and top_p=0.95 are applied
recommended_config: {
temperature: 0.7,
presence_penalty: 0,
frequency_penalty: 0,
top_p: 0.8,
},
};

const DEFAULT_MODEL_BASES: ModelRecord[] = [
// Phi-3.5 Vision
{
Expand Down Expand Up @@ -410,6 +424,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
top_p: 1,
},
},
// Mistral
{
name: "Mistral-7B-Instruct-v0.3-q4f16_1-MLC",
display_name: "Mistral",
Expand Down Expand Up @@ -464,6 +479,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
top_p: 0.95,
},
},
// WizardMath
{
name: "WizardMath-7B-V1.1-q4f16_1-MLC",
display_name: "WizardMath",
Expand Down Expand Up @@ -571,20 +587,50 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
top_p: 1,
},
},
// Qwen3
{
name: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
display_name: "Qwen",
provider: "Alibaba",
family: ModelFamily.QWEN,
recommended_config: {
temperature: 0.7,
presence_penalty: 0,
frequency_penalty: 0,
top_p: 0.8,
},
name: "Qwen3-0.6B-q4f16_1-MLC",
...qwen3_common_configs,
},
{
name: "Qwen2.5-0.5B-Instruct-q4f32_1-MLC",
name: "Qwen3-0.6B-q4f32_1-MLC",
...qwen3_common_configs,
},
{
name: "Qwen3-0.6B-q0f16-MLC",
...qwen3_common_configs,
},
{
name: "Qwen3-0.6B-q0f32-MLC",
...qwen3_common_configs,
},
{
name: "Qwen3-1.7B-q4f16_1-MLC",
...qwen3_common_configs,
},
{
name: "Qwen3-1.7B-q4f32_1-MLC",
...qwen3_common_configs,
},
{
name: "Qwen3-4B-q4f16_1-MLC",
...qwen3_common_configs,
},
{
name: "Qwen3-4B-q4f32_1-MLC",
...qwen3_common_configs,
},
{
name: "Qwen3-8B-q4f16_1-MLC",
...qwen3_common_configs,
},
{
name: "Qwen3-8B-q4f32_1-MLC",
...qwen3_common_configs,
},
// Qwen2.5
{
name: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
display_name: "Qwen",
provider: "Alibaba",
family: ModelFamily.QWEN,
Expand All @@ -596,7 +642,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
},
},
{
name: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
name: "Qwen2.5-0.5B-Instruct-q4f32_1-MLC",
display_name: "Qwen",
provider: "Alibaba",
family: ModelFamily.QWEN,
Expand Down Expand Up @@ -873,6 +919,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
top_p: 0.8,
},
},
// Gemma 2
{
name: "gemma-2-2b-it-q4f16_1-MLC",
display_name: "Gemma",
Expand Down Expand Up @@ -969,6 +1016,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
top_p: 0.9,
},
},
// StableLM
{
name: "stablelm-2-zephyr-1_6b-q4f16_1-MLC",
display_name: "StableLM",
Expand Down Expand Up @@ -1017,6 +1065,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
top_p: 0.95,
},
},
// RedPajama
{
name: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC",
display_name: "RedPajama",
Expand Down Expand Up @@ -1057,6 +1106,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
top_p: 0.95,
},
},
// TinyLlama
{
name: "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC",
display_name: "TinyLlama",
Expand Down Expand Up @@ -1105,6 +1155,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
top_p: 1,
},
},
// Older models
{
name: "Llama-3.1-70B-Instruct-q3f16_1-MLC",
display_name: "Llama",
Expand Down
4 changes: 4 additions & 0 deletions app/locales/cn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,10 @@ const cn = {
Title: "频率惩罚度 (frequency_penalty)",
SubTitle: "值越大,越有可能降低重复字词",
},
EnableThinking: {
Title: "深度思考",
SubTitle: "允许模型在回答时进行深度思考",
},
},
Store: {
DefaultTopic: "新的聊天",
Expand Down
5 changes: 5 additions & 0 deletions app/locales/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ const en = {
"Will compress if uncompressed messages length exceeds the value",
},

THINKING: "Thinking",
Usage: {
Title: "Account Balance",
SubTitle(used: any, total: any) {
Expand Down Expand Up @@ -264,6 +265,10 @@ const en = {
Title: "Logging Level",
SubTitle: "Adjust how much detail should be printed to console",
},
EnableThinking: {
Title: "Enable Thinking",
SubTitle: "Allow reasoning models to think step-by-step",
},
},
Store: {
DefaultTopic: "New Conversation",
Expand Down
6 changes: 6 additions & 0 deletions app/store/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ export const useChatStore = createPersistStore(
...modelConfig,
cache: useAppConfig.getState().cacheType,
stream: true,
enable_thinking: useAppConfig.getState().enableThinking,
},
onUpdate(message) {
botMessage.streaming = true;
Expand All @@ -362,6 +363,9 @@ export const useChatStore = createPersistStore(
botMessage.usage = usage;
botMessage.stopReason = stopReason;
if (message) {
if (!this.config.enable_thinking) {
message = message.replace(/<think>\s*<\/think>/g, "");
}
botMessage.content = message;
get().onNewMessage(botMessage, llm);
}
Expand Down Expand Up @@ -532,6 +536,7 @@ export const useChatStore = createPersistStore(
model: modelConfig.model,
cache: useAppConfig.getState().cacheType,
stream: false,
enable_thinking: false, // never think for topic
},
onFinish(message) {
get().updateCurrentSession(
Expand Down Expand Up @@ -615,6 +620,7 @@ export const useChatStore = createPersistStore(
stream: true,
model: modelConfig.model,
cache: useAppConfig.getState().cacheType,
enable_thinking: false, // never think for summarization
},
onUpdate(message) {
session.memoryPrompt = message;
Expand Down
Loading