Skip to content

Commit

Permalink
Generate embeddings for emails processed with LLM (#364)
Browse files Browse the repository at this point in the history
  • Loading branch information
andris9 authored Sep 18, 2023
1 parent b382569 commit a787cbf
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 5 deletions.
31 changes: 31 additions & 0 deletions lib/mailbox.js
Original file line number Diff line number Diff line change
Expand Up @@ -1135,6 +1135,37 @@ class Mailbox {
);
this.logger.error({ msg: 'Failed to fetch summary from OpenAI', err });
}

let openAiGenerateEmbeddings = await settings.get('openAiGenerateEmbeddings');
if (openAiGenerateEmbeddings) {
try {
messageInfo.embeddings = await this.connection.call({
cmd: 'generateEmbeddings',
data: {
message: {
headers: Object.keys(messageInfo.headers || {}).map(key => ({ key, value: [].concat(messageInfo.headers[key] || []) })),
attachments: messageInfo.attachments,
from: messageInfo.from,
subject: messageInfo.subject,
text: messageInfo.text.plain,
html: messageInfo.text.html
}
},
timeout: 2 * 60 * 1000
});
} catch (err) {
await this.connection.redis.set(
`${REDIS_PREFIX}:openai:error`,
JSON.stringify({
message: err.message,
code: err.code,
statusCode: err.statusCode,
time: Date.now()
})
);
this.logger.error({ msg: 'Failed to fetch embeddings OpenAI', err });
}
}
}
}

Expand Down
29 changes: 28 additions & 1 deletion lib/routes-ui.js
Original file line number Diff line number Diff line change
Expand Up @@ -1330,6 +1330,7 @@ function applyRoutes(server, call) {

const values = {
generateEmailSummary: (await settings.get('generateEmailSummary')) || false,
openAiGenerateEmbeddings: (await settings.get('openAiGenerateEmbeddings')) || false,

openAiPrompt: ((await settings.get('openAiPrompt')) || '').toString(),

Expand Down Expand Up @@ -1407,6 +1408,7 @@ return true;`

let data = {
generateEmailSummary: request.payload.generateEmailSummary,
openAiGenerateEmbeddings: request.payload.openAiGenerateEmbeddings,
openAiModel: request.payload.openAiModel,
openAiPrompt: (request.payload.openAiPrompt || '').toString(),
openAiPreProcessingFn: contentFn,
Expand Down Expand Up @@ -1551,6 +1553,7 @@ return true;`

payload: Joi.object({
generateEmailSummary: settingsSchema.generateEmailSummary.default(false),
openAiGenerateEmbeddings: settingsSchema.openAiGenerateEmbeddings.default(false),

openAiAPIKey: settingsSchema.openAiAPIKey.empty(''),
openAiModel: settingsSchema.openAiModel.empty(''),
Expand Down Expand Up @@ -1599,6 +1602,29 @@ return true;`
timeout: 2 * 60 * 1000
});

if (request.payload.openAiGenerateEmbeddings) {
try {
response.embeddings = await call({
cmd: 'generateEmbeddings',
data: {
message: {
headers: parsed.headerLines.map(header => libmime.decodeHeader(header.line)),
attachments: parsed.attachments,
html: parsed.html,
text: parsed.text
},
openAiAPIKey: request.payload.openAiAPIKey
},
timeout: 2 * 60 * 1000
});
} catch (err) {
response.embeddings = {
error: err.message
};
logger.error({ msg: 'Failed to fetch embeddings', err });
}
}

// crux from olden times
for (let key of Object.keys(response.summary)) {
// remove meta keys from output
Expand Down Expand Up @@ -1636,7 +1662,8 @@ return true;`
openAiModel: settingsSchema.openAiModel.empty(''),
openAiPrompt: settingsSchema.openAiPrompt.default(''),
openAiTemperature: settingsSchema.openAiTemperature.empty(''),
openAiTopP: settingsSchema.openAiTopP.empty('')
openAiTopP: settingsSchema.openAiTopP.empty(''),
openAiGenerateEmbeddings: settingsSchema.openAiGenerateEmbeddings
})
}
}
Expand Down
6 changes: 5 additions & 1 deletion lib/schemas.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ const settingsSchema = {
.truthy('Y', 'true', '1', 'on')
.falsy('N', 'false', 0, '')
.description('If true, then extracts reply text using OpenAI ChatGPT'),

generateRiskAssessment: Joi.boolean().truthy('Y', 'true', '1', 'on').falsy('N', 'false', 0, '').description('(deprecated, not used)'),

openAiAPIKey: Joi.string().allow('').example('verysecr8t').description('OpenAI API key').label('OpenAiAPIKey'),
Expand All @@ -100,6 +99,11 @@ const settingsSchema = {
.description('Prompt to send to LLM for analyzing emails')
.label('OpenAiPrompt'),

openAiGenerateEmbeddings: Joi.boolean()
.truthy('Y', 'true', '1', 'on')
.falsy('N', 'false', 0, '')
.description('If true, then generates vector embeddings for the email'),

inboxNewOnly: Joi.boolean()
.truthy('Y', 'true', '1', 'on')
.falsy('N', 'false', 0, '')
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
"@hapi/vision": "7.0.3",
"@phc/pbkdf2": "1.1.14",
"@postalsys/certs": "1.0.5",
"@postalsys/email-ai-tools": "1.2.1",
"@postalsys/email-ai-tools": "1.3.2",
"@postalsys/email-text-tools": "2.1.1",
"@postalsys/hecks": "3.0.0-fork.3",
"@postalsys/templates": "1.0.5",
Expand Down
15 changes: 14 additions & 1 deletion server.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ const {
} = require('./lib/consts');

const { webhooks: Webhooks } = require('./lib/webhooks');
const { generateSummary, DEFAULT_USER_PROMPT: openAiDefaultPrompt } = require('@postalsys/email-ai-tools');
const { generateSummary, generateEmbeddings, DEFAULT_USER_PROMPT: openAiDefaultPrompt } = require('@postalsys/email-ai-tools');
const { fetch: fetchCmd, Agent } = require('undici');
const fetchAgent = new Agent({ connect: { timeout: FETCH_TIMEOUT } });

Expand Down Expand Up @@ -1414,6 +1414,19 @@ async function onCommand(worker, message) {
return await generateSummary(message.data.message, openAiAPIKey, requestOpts);
}

// run these in main process to avoid polluting RAM with the memory hungry tokenization library
case 'generateEmbeddings': {
let requestOpts = {};

let openAiAPIKey = message.data.openAiAPIKey || (await settings.get('openAiAPIKey'));

if (!openAiAPIKey) {
throw new Error(`OpenAI API key is not set`);
}

return { chunks: await generateEmbeddings(message.data.message, openAiAPIKey, requestOpts) };
}

case 'openAiDefaultPrompt': {
return openAiDefaultPrompt;
}
Expand Down
19 changes: 18 additions & 1 deletion views/config/ai.hbs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,22 @@
<small class="form-text text-muted">OpenAI API key.</small>
</div>

<div class="form-group form-check">
<div class="text-muted float-right code-link">[<a href="/admin/iframe/docs#/settings/postV1Settings"
target="_blank">openAiGenerateEmbeddings</a>]</div>

<input type="checkbox" class="form-check-input {{#if errors.openAiGenerateEmbeddings}}is-invalid{{/if}}"
id="openAiGenerateEmbeddings" name="openAiGenerateEmbeddings" {{#if
values.openAiGenerateEmbeddings}}checked{{/if}} />
<label class="form-check-label" for="openAiGenerateEmbeddings">Generate vector embeddings for the
email</label>
{{#if errors.openAiGenerateEmbeddings}}
<span class="invalid-feedback">{{errors.openAiGenerateEmbeddings}}</span>
{{/if}}
<small class="form-text text-muted">EmailEngine uses OpenAI API to generate text embeddings for the
email. To decrease vector size, EmailEngine splits emails into chunks of up to 600 tokens and
generates embeddings for each chunk separately.</small>
</div>
</div>
</div>

Expand Down Expand Up @@ -581,7 +597,8 @@
openAiModel: document.getElementById('settingsServiceOpenAiModel').value,
openAiAPIKey: document.getElementById('openAiAPIKey').value,
openAiTemperature: document.getElementById('settingsOpenAiTemperature').value,
openAiTopP: document.getElementById('settingsOpenAiTopP').value
openAiTopP: document.getElementById('settingsOpenAiTopP').value,
openAiGenerateEmbeddings: document.getElementById('openAiGenerateEmbeddings').value,
};
let res;
Expand Down

0 comments on commit a787cbf

Please sign in to comment.