Skip to content

Commit e417f83

Browse files
authored
feat(self-host): ollama support (#1219)
1 parent 192f003 commit e417f83

File tree

9 files changed

+30
-55
lines changed

9 files changed

+30
-55
lines changed

SELF_HOST.md

+5
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ USE_DB_AUTHENTICATION=false
5151
# Provide your OpenAI API key here to enable AI features
5252
# OPENAI_API_KEY=
5353
54+
# Experimental: Use Ollama
55+
# OPENAI_API_KEY=ollama
56+
# OPENAI_BASE_URL=http://localhost:11434/v1
57+
# MODEL_NAME=deepseek-r1:7b
58+
5459
## === Proxy ===
5560
# PROXY_SERVER can be a full URL (e.g. http://0.1.2.3:1234) or just an IP and port combo (e.g. 0.1.2.3:1234)
5661
# Do not uncomment PROXY_USERNAME and PROXY_PASSWORD if your proxy is unauthenticated

apps/api/src/lib/LLM-extraction/index.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ export async function generateCompletions(
6767

6868
export async function generateBasicCompletion(prompt: string) {
6969
const openai = new OpenAI();
70-
const model = "gpt-4o";
70+
const model = process.env.MODEL_NAME || "gpt-4o";
7171

7272
const completion = await openai.chat.completions.create({
7373
temperature: 0,

apps/api/src/lib/extract/completions.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
// export async function generateBasicCompletion(prompt: string) {
2828
// const openai = new OpenAI();
2929
// const model: TiktokenModel =
30-
// (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini";
30+
// (process.env.MODEL_NAME as TiktokenModel) || "gpt-4o-mini";
3131

3232
// const completion = await openai.chat.completions.create({
3333
// model,
@@ -48,7 +48,7 @@
4848
// }> {
4949
// const openai = new OpenAI();
5050
// const model: TiktokenModel =
51-
// (process.env.MODEL_NAME as TiktokenModel) ?? "gpt-4o-mini";
51+
// (process.env.MODEL_NAME as TiktokenModel) || "gpt-4o-mini";
5252

5353
// let extractionContent = pagesContent;
5454
// let numTokens = 0;
@@ -109,7 +109,7 @@
109109
// : { type: "json_object" },
110110
// });
111111

112-
// if (jsonCompletion.choices[0].message.refusal !== null) {
112+
// if (jsonCompletion.choices[0].message.refusal !== null && jsonCompletion.choices[0].message.refusal !== undefined) {
113113
// throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal);
114114
// }
115115

apps/api/src/lib/extract/completions/analyzeSchemaAndPrompt.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ export async function analyzeSchemaAndPrompt(
3737
"isMultiEntity was true, but no multiEntityKeys",
3838
);
3939

40-
const model = "gpt-4o";
40+
const model = process.env.MODEL_NAME || "gpt-4o";
4141

4242
const openai = new OpenAI();
4343
const result = await openai.beta.chat.completions.parse({
44-
model: model,
44+
model,
4545
messages: [
4646
{
4747
role: "system",

apps/api/src/lib/extract/index/pinecone.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ async function getEmbedding(text: string) {
2828
});
2929

3030
const embedding = await openai.embeddings.create({
31-
model: "text-embedding-3-small",
31+
model: process.env.EMBEDDING_MODEL_NAME || "text-embedding-3-small",
3232
input: text,
3333
encoding_format: "float",
3434
});

apps/api/src/lib/extract/usage/llm-cost.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ export function estimateTotalCost(tokenUsage: TokenUsage[]): number {
2626
export function estimateCost(tokenUsage: TokenUsage): number {
2727
let totalCost = 0;
2828
try {
29-
let model = tokenUsage.model ?? process.env.MODEL_NAME ?? "gpt-4o-mini";
29+
let model = tokenUsage.model ?? (process.env.MODEL_NAME || "gpt-4o-mini");
3030
const pricing = modelPrices[model] as ModelPricing;
3131

3232
if (!pricing) {

apps/api/src/lib/generate-llmstxt/generate-llmstxt-service.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ export async function performGenerateLlmsTxt(options: GenerateLLMsTextServiceOpt
132132
_logger.debug(`Generating description for ${document.metadata?.url}`);
133133

134134
const completion = await openai.beta.chat.completions.parse({
135-
model: "gpt-4o-mini",
135+
model: process.env.MODEL_NAME || "gpt-4o-mini",
136136
messages: [
137137
{
138138
role: "user",

apps/api/src/lib/llm/generate.ts

-33
This file was deleted.

apps/api/src/scraper/scrapeURL/transformers/llmExtract.ts

+16-13
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ export async function generateOpenAICompletions(
124124
markdown?: string,
125125
previousWarning?: string,
126126
isExtractEndpoint?: boolean,
127-
model: TiktokenModel = (process.env.MODEL_NAME as TiktokenModel) ??
127+
model: TiktokenModel = (process.env.MODEL_NAME as TiktokenModel) ||
128128
"gpt-4o-mini",
129129
): Promise<{
130130
extract: any;
@@ -151,15 +151,21 @@ export async function generateOpenAICompletions(
151151

152152
// count number of tokens
153153
let numTokens = 0;
154-
const encoder = encoding_for_model(model as TiktokenModel);
155154
try {
156155
// Encode the message into tokens
157-
const tokens = encoder.encode(markdown);
158-
159-
// Return the number of tokens
160-
numTokens = tokens.length;
156+
const encoder = encoding_for_model(model as TiktokenModel);
157+
158+
try {
159+
const tokens = encoder.encode(markdown);
160+
numTokens = tokens.length;
161+
} catch (e) {
162+
throw e;
163+
} finally {
164+
// Free the encoder resources after use
165+
encoder.free();
166+
}
161167
} catch (error) {
162-
logger.warn("Calculating num tokens of string failed", { error, markdown });
168+
logger.warn("Calculating num tokens of string failed", { error });
163169

164170
markdown = markdown.slice(0, maxTokensSafe * modifier);
165171

@@ -168,9 +174,6 @@ export async function generateOpenAICompletions(
168174
maxTokensSafe +
169175
") we support.";
170176
warning = previousWarning === undefined ? w : w + " " + previousWarning;
171-
} finally {
172-
// Free the encoder resources after use
173-
encoder.free();
174177
}
175178

176179
if (numTokens > maxTokensSafe) {
@@ -247,7 +250,7 @@ export async function generateOpenAICompletions(
247250
: { type: "json_object" },
248251
});
249252

250-
if (jsonCompletion.choices[0].message.refusal !== null) {
253+
if (jsonCompletion.choices[0].message.refusal !== null && jsonCompletion.choices[0].message.refusal !== undefined) {
251254
throw new LLMRefusalError(jsonCompletion.choices[0].message.refusal);
252255
}
253256

@@ -351,7 +354,7 @@ export async function generateSchemaFromPrompt(prompt: string): Promise<any> {
351354
for (const temp of temperatures) {
352355
try {
353356
const result = await openai.beta.chat.completions.parse({
354-
model: "gpt-4o",
357+
model: process.env.MODEL_NAME || "gpt-4o",
355358
temperature: temp,
356359
messages: [
357360
{
@@ -392,7 +395,7 @@ Return a valid JSON schema object with properties that would capture the informa
392395
},
393396
});
394397

395-
if (result.choices[0].message.refusal !== null) {
398+
if (result.choices[0].message.refusal !== null && result.choices[0].message.refusal !== undefined) {
396399
throw new Error("LLM refused to generate schema");
397400
}
398401

0 commit comments

Comments
 (0)