Skip to content

Commit

Permalink
Merge pull request #8 from n4ze3m/next
Browse files Browse the repository at this point in the history
cohere, huggingface embedding
  • Loading branch information
n4ze3m authored Jun 9, 2023
2 parents 2ee1e7b + 14f7c24 commit 102b577
Show file tree
Hide file tree
Showing 15 changed files with 173 additions and 143 deletions.
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,13 @@ and more...

- [x] OpenAI
- [ ] Anthropic
- [ ] Falcon-7B

### Embedding models

- [X] OpenAI
- [X] TensorFlow
- [ ] HuggingFace
- [ ] Cohere
- [X] HuggingFace
- [X] Cohere


### Application
Expand Down
3 changes: 2 additions & 1 deletion app/ui/src/utils/embeddings.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
export const availableEmbeddingTypes = [
{ value: "openai", label: "OpenAI" },
{ value: "tensorflow", label: "Tensorflow" },
// { value: "cohere", label: "Cohere"}
{ value: "cohere", label: "Cohere"},
{ value: "huggingface-api", label: "HuggingFace (Inference)"}
];

4 changes: 3 additions & 1 deletion docker/imp.env
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ OPENAI_API_KEY=""
# DB_SECRET_KEY is used for jwt token generation please change it to your own secret key
DB_SECRET_KEY="super-secret-key"
# Cohere API key -> https://dashboard.cohere.ai/api-keys
# COHERE_API_KEY=""
COHERE_API_KEY=""
# Huggingface Hub API key -> https://huggingface.co/settings/token
HUGGINGFACEHUB_API_KEY=""
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "dialoqbase",
"version": "0.0.3",
"version": "0.0.4",
"description": "Create chatbots with ease",
"scripts": {
"ui:dev": "pnpm run --filter ui dev",
Expand Down
1 change: 1 addition & 0 deletions server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"@fastify/multipart": "^7.6.0",
"@fastify/sensible": "^5.0.0",
"@fastify/static": "^6.10.2",
"@huggingface/inference": "1",
"@prisma/client": "4.15.0",
"@tensorflow-models/universal-sentence-encoder": "^1.3.3",
"@tensorflow/tfjs-backend-cpu": "^4.7.0",
Expand Down
2 changes: 2 additions & 0 deletions server/prisma/schema.prisma
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
generator client {
provider = "prisma-client-js"
previewFeatures = ["postgresqlExtensions"]
}

datasource db {
provider = "postgresql"
url = env("DATABASE_URL")
extensions = [pgvector(map: "vector", schema: "extensions")]
}

model Bot {
Expand Down
1 change: 1 addition & 0 deletions server/src/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { FastifyPluginAsync } from "fastify";
import cors from "@fastify/cors";
import fastifyStatic from "@fastify/static";
import fastifyMultipart from "@fastify/multipart";

export type AppOptions = {} & Partial<AutoloadPluginOptions>;

const options: AppOptions = {};
Expand Down
248 changes: 125 additions & 123 deletions server/src/queue/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,145 +18,147 @@ export const queue = new Queue("vector", process.env.DB_REDIS_URL!, {});
export const queueHandler = async (job: Job, done: DoneCallback) => {
const data = job.data as QSource[];

console.log("Processing queue" );

for (const source of data) {
try {
if (source.type.toLowerCase() === "website") {
await prisma.botSource.update({
where: {
id: source.id,
},
data: {
status: "PROCESSING",
},
});

const loader = new CheerioWebBaseLoader(source.content!);
const docs = await loader.load();

const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkOverlap: 200,
});
const chunks = await textSplitter.splitDocuments(docs);

await DialoqbaseVectorStore.fromDocuments(
chunks,
embeddings(source.embedding),
{
botId: source.botId,
sourceId: source.id,
},
);

await prisma.botSource.update({
where: {
id: source.id,
},
data: {
status: "FINISHED",
isPending: false,
},
});
} else if (source.type.toLowerCase() === "text") {
await prisma.botSource.update({
where: {
id: source.id,
},
data: {
status: "PROCESSING",
},
});

const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkOverlap: 200,
});
const chunks = await textSplitter.splitDocuments([
{
pageContent: source.content!,
metadata: {
source: `text-${source.id}`,
console.log("Processing queue");
try {
for (const source of data) {
try {
if (source.type.toLowerCase() === "website") {
await prisma.botSource.update({
where: {
id: source.id,
},
},
]);

await DialoqbaseVectorStore.fromDocuments(
chunks,
embeddings(source.embedding),
{
botId: source.botId,
sourceId: source.id,
},
);
data: {
status: "PROCESSING",
},
});

const loader = new CheerioWebBaseLoader(source.content!);
const docs = await loader.load();

const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkOverlap: 200,
});
const chunks = await textSplitter.splitDocuments(docs);

await DialoqbaseVectorStore.fromDocuments(
chunks,
embeddings(source.embedding),
{
botId: source.botId,
sourceId: source.id,
},
);

await prisma.botSource.update({
where: {
id: source.id,
},
data: {
status: "FINISHED",
isPending: false,
},
});
} else if (source.type.toLowerCase() === "pdf") {
console.log("loading pdf");
await prisma.botSource.update({
where: {
id: source.id,
},
data: {
status: "PROCESSING",
},
});
await prisma.botSource.update({
where: {
id: source.id,
},
data: {
status: "FINISHED",
isPending: false,
},
});
} else if (source.type.toLowerCase() === "text") {
await prisma.botSource.update({
where: {
id: source.id,
},
data: {
status: "PROCESSING",
},
});

const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkOverlap: 200,
});
const chunks = await textSplitter.splitDocuments([
{
pageContent: source.content!,
metadata: {
source: `text-${source.id}`,
},
},
]);

await DialoqbaseVectorStore.fromDocuments(
chunks,
embeddings(source.embedding),
{
botId: source.botId,
sourceId: source.id,
},
);

const location = source.location!;
const loader = new PDFLoader(location);
const docs = await loader.load();
await prisma.botSource.update({
where: {
id: source.id,
},
data: {
status: "FINISHED",
isPending: false,
},
});
} else if (source.type.toLowerCase() === "pdf") {
console.log("loading pdf");
await prisma.botSource.update({
where: {
id: source.id,
},
data: {
status: "PROCESSING",
},
});

const location = source.location!;
const loader = new PDFLoader(location);
const docs = await loader.load();

const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkOverlap: 200,
});
const chunks = await textSplitter.splitDocuments(docs);

await DialoqbaseVectorStore.fromDocuments(
chunks,
embeddings(source.embedding),
{
botId: source.botId,
sourceId: source.id,
},
);

const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkOverlap: 200,
});
const chunks = await textSplitter.splitDocuments(docs);

await DialoqbaseVectorStore.fromDocuments(
chunks,
embeddings(source.embedding),
{
botId: source.botId,
sourceId: source.id,
},
);
await prisma.botSource.update({
where: {
id: source.id,
},
data: {
status: "FINISHED",
isPending: false,
},
});
}
} catch (e) {
console.log(e);

await prisma.botSource.update({
where: {
id: source.id,
},
data: {
status: "FINISHED",
status: "FAILED",
isPending: false,
},
});
}
} catch (e) {
console.log(e);

await prisma.botSource.update({
where: {
id: source.id,
},
data: {
status: "FAILED",
isPending: false,
},
});
}
} catch (e) {
console.log(e);
} finally {
done();
}

done();
};

queue.process(queueHandler);

5 changes: 3 additions & 2 deletions server/src/routes/api/v1/bot/handlers/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,10 @@ export const createBotPDFHandler = async (
...botSource,
embedding: bot.embedding,
}]);
return {

return reply.status(200).send({
id: bot.id,
};
});
} catch (err) {
return reply.status(500).send({
message: "Upload failed due to internal server error",
Expand Down
2 changes: 1 addition & 1 deletion server/src/routes/api/v1/bot/handlers/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ export const createBotSchema: FastifySchema = {
},
embedding: {
type: "string",
enum: ["tensorflow", "openai", "cohere"],
enum: ["tensorflow", "openai", "cohere", "huggingface-api"],
}
},
},
Expand Down
2 changes: 2 additions & 0 deletions server/src/routes/bot/root.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ const root: FastifyPluginAsync = async (fastify, _): Promise<void> => {
fastify.get("/:id", async (request, reply) => {
return reply.sendFile('bot.html')
});


};

export default root;
8 changes: 7 additions & 1 deletion server/src/utils/embeddings.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
import "@tensorflow/tfjs-backend-cpu";
import { TensorFlowEmbeddings } from "langchain/embeddings/tensorflow";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { CohereEmbeddings } from "langchain/embeddings/cohere";
import { HuggingFaceInferenceEmbeddings } from "langchain/embeddings/hf";

export const embeddings = (embeddingsType: string) => {
switch (embeddingsType) {
case "tensorflow":
return new TensorFlowEmbeddings();
case "openai":
return new OpenAIEmbeddings();
case "cohere":
return new CohereEmbeddings();
case "huggingface-api":
return new HuggingFaceInferenceEmbeddings();
default:
return new OpenAIEmbeddings();
}
};
};
Loading

0 comments on commit 102b577

Please sign in to comment.