From a787cbf9d3e82ceece440756132975d9323d66cf Mon Sep 17 00:00:00 2001 From: Andris Reinman Date: Mon, 18 Sep 2023 14:53:17 +0300 Subject: [PATCH] Generate embeddings for emails processed with LLM (#364) --- lib/mailbox.js | 31 +++++++++++++++++++++++++++++++ lib/routes-ui.js | 29 ++++++++++++++++++++++++++++- lib/schemas.js | 6 +++++- package.json | 2 +- server.js | 15 ++++++++++++++- views/config/ai.hbs | 19 ++++++++++++++++++- 6 files changed, 97 insertions(+), 5 deletions(-) diff --git a/lib/mailbox.js b/lib/mailbox.js index b24d79d1..71f596d5 100644 --- a/lib/mailbox.js +++ b/lib/mailbox.js @@ -1135,6 +1135,37 @@ class Mailbox { ); this.logger.error({ msg: 'Failed to fetch summary from OpenAI', err }); } + + let openAiGenerateEmbeddings = await settings.get('openAiGenerateEmbeddings'); + if (openAiGenerateEmbeddings) { + try { + messageInfo.embeddings = await this.connection.call({ + cmd: 'generateEmbeddings', + data: { + message: { + headers: Object.keys(messageInfo.headers || {}).map(key => ({ key, value: [].concat(messageInfo.headers[key] || []) })), + attachments: messageInfo.attachments, + from: messageInfo.from, + subject: messageInfo.subject, + text: messageInfo.text.plain, + html: messageInfo.text.html + } + }, + timeout: 2 * 60 * 1000 + }); + } catch (err) { + await this.connection.redis.set( + `${REDIS_PREFIX}:openai:error`, + JSON.stringify({ + message: err.message, + code: err.code, + statusCode: err.statusCode, + time: Date.now() + }) + ); + this.logger.error({ msg: 'Failed to fetch embeddings OpenAI', err }); + } + } } } diff --git a/lib/routes-ui.js b/lib/routes-ui.js index 62dba627..c45fddf5 100644 --- a/lib/routes-ui.js +++ b/lib/routes-ui.js @@ -1330,6 +1330,7 @@ function applyRoutes(server, call) { const values = { generateEmailSummary: (await settings.get('generateEmailSummary')) || false, + openAiGenerateEmbeddings: (await settings.get('openAiGenerateEmbeddings')) || false, openAiPrompt: ((await settings.get('openAiPrompt')) || '').toString(), @@ -1407,6 +1408,7 @@ return true;` let data = { generateEmailSummary: request.payload.generateEmailSummary, + openAiGenerateEmbeddings: request.payload.openAiGenerateEmbeddings, openAiModel: request.payload.openAiModel, openAiPrompt: (request.payload.openAiPrompt || '').toString(), openAiPreProcessingFn: contentFn, @@ -1551,6 +1553,7 @@ return true;` payload: Joi.object({ generateEmailSummary: settingsSchema.generateEmailSummary.default(false), + openAiGenerateEmbeddings: settingsSchema.openAiGenerateEmbeddings.default(false), openAiAPIKey: settingsSchema.openAiAPIKey.empty(''), openAiModel: settingsSchema.openAiModel.empty(''), @@ -1599,6 +1602,29 @@ return true;` timeout: 2 * 60 * 1000 }); + if (request.payload.openAiGenerateEmbeddings) { + try { + response.embeddings = await call({ + cmd: 'generateEmbeddings', + data: { + message: { + headers: parsed.headerLines.map(header => libmime.decodeHeader(header.line)), + attachments: parsed.attachments, + html: parsed.html, + text: parsed.text + }, + openAiAPIKey: request.payload.openAiAPIKey + }, + timeout: 2 * 60 * 1000 + }); + } catch (err) { + response.embeddings = { + error: err.message + }; + logger.error({ msg: 'Failed to fetch embeddings', err }); + } + } + // crux from olden times for (let key of Object.keys(response.summary)) { // remove meta keys from output @@ -1636,7 +1662,8 @@ return true;` openAiModel: settingsSchema.openAiModel.empty(''), openAiPrompt: settingsSchema.openAiPrompt.default(''), openAiTemperature: settingsSchema.openAiTemperature.empty(''), - openAiTopP: settingsSchema.openAiTopP.empty('') + openAiTopP: settingsSchema.openAiTopP.empty(''), + openAiGenerateEmbeddings: settingsSchema.openAiGenerateEmbeddings }) } } diff --git a/lib/schemas.js b/lib/schemas.js index 820c0f33..7b351e43 100644 --- a/lib/schemas.js +++ b/lib/schemas.js @@ -84,7 +84,6 @@ const settingsSchema = { .truthy('Y', 'true', '1', 'on') .falsy('N', 'false', 0, '') .description('If true, then extracts reply text using OpenAI ChatGPT'), - generateRiskAssessment: Joi.boolean().truthy('Y', 'true', '1', 'on').falsy('N', 'false', 0, '').description('(deprecated, not used)'), openAiAPIKey: Joi.string().allow('').example('verysecr8t').description('OpenAI API key').label('OpenAiAPIKey'), @@ -100,6 +99,11 @@ const settingsSchema = { .description('Prompt to send to LLM for analyzing emails') .label('OpenAiPrompt'), + openAiGenerateEmbeddings: Joi.boolean() + .truthy('Y', 'true', '1', 'on') + .falsy('N', 'false', 0, '') + .description('If true, then generates vector embeddings for the email'), + inboxNewOnly: Joi.boolean() .truthy('Y', 'true', '1', 'on') .falsy('N', 'false', 0, '') diff --git a/package.json b/package.json index df78f426..fce7a9b3 100644 --- a/package.json +++ b/package.json @@ -50,7 +50,7 @@ "@hapi/vision": "7.0.3", "@phc/pbkdf2": "1.1.14", "@postalsys/certs": "1.0.5", - "@postalsys/email-ai-tools": "1.2.1", + "@postalsys/email-ai-tools": "1.3.2", "@postalsys/email-text-tools": "2.1.1", "@postalsys/hecks": "3.0.0-fork.3", "@postalsys/templates": "1.0.5", diff --git a/server.js b/server.js index 237efe4d..3f308d4f 100644 --- a/server.js +++ b/server.js @@ -55,7 +55,7 @@ const { } = require('./lib/consts'); const { webhooks: Webhooks } = require('./lib/webhooks'); -const { generateSummary, DEFAULT_USER_PROMPT: openAiDefaultPrompt } = require('@postalsys/email-ai-tools'); +const { generateSummary, generateEmbeddings, DEFAULT_USER_PROMPT: openAiDefaultPrompt } = require('@postalsys/email-ai-tools'); const { fetch: fetchCmd, Agent } = require('undici'); const fetchAgent = new Agent({ connect: { timeout: FETCH_TIMEOUT } }); @@ -1414,6 +1414,19 @@ async function onCommand(worker, message) { return await generateSummary(message.data.message, openAiAPIKey, requestOpts); } + // run these in main process to avoid polluting RAM with the memory hungry tokenization library + case 'generateEmbeddings': { + let requestOpts = {}; + + let openAiAPIKey = message.data.openAiAPIKey || (await settings.get('openAiAPIKey')); + + if (!openAiAPIKey) { + throw new Error(`OpenAI API key is not set`); + } + + return { chunks: await generateEmbeddings(message.data.message, openAiAPIKey, requestOpts) }; + } + case 'openAiDefaultPrompt': { return openAiDefaultPrompt; } diff --git a/views/config/ai.hbs b/views/config/ai.hbs index b14586e1..33730e3e 100644 --- a/views/config/ai.hbs +++ b/views/config/ai.hbs @@ -99,6 +99,22 @@ OpenAI API key. +
+ + + + + {{#if errors.openAiGenerateEmbeddings}} + {{errors.openAiGenerateEmbeddings}} + {{/if}} + EmailEngine uses OpenAI API to generate text embeddings for the + email. To decrease vector size, EmailEngine splits emails into chunks of up to 600 tokens and + generates embeddings for each chunk separately. +
@@ -581,7 +597,8 @@ openAiModel: document.getElementById('settingsServiceOpenAiModel').value, openAiAPIKey: document.getElementById('openAiAPIKey').value, openAiTemperature: document.getElementById('settingsOpenAiTemperature').value, - openAiTopP: document.getElementById('settingsOpenAiTopP').value + openAiTopP: document.getElementById('settingsOpenAiTopP').value, + openAiGenerateEmbeddings: document.getElementById('openAiGenerateEmbeddings').value, }; let res;