Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for Pgvector #74

Merged
merged 2 commits into from
Dec 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package io.quarkiverse.langchain4j.samples;

import static dev.langchain4j.data.document.splitter.DocumentSplitters.recursive;

import java.util.List;

import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import io.quarkiverse.langchain4j.pgvector.PgVectorEmbeddingStore;

@ApplicationScoped
public class IngestorExampleWithPgvector {

/**
* The embedding store (the database).
* The bean is provided by the quarkus-langchain4j-pgvector extension.
*/
@Inject
PgVectorEmbeddingStore store;

/**
* The embedding model (how is computed the vector of a document).
* The bean is provided by the LLM (like openai) extension.
*/
@Inject
EmbeddingModel embeddingModel;

public void ingest(List<Document> documents) {
EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder()
.embeddingStore(store)
.embeddingModel(embeddingModel)
.documentSplitter(recursive(500, 0))
.build();
// Warning - this can take a long time...
ingestor.ingest(documents);
}
}
114 changes: 114 additions & 0 deletions docs/modules/ROOT/pages/includes/quarkus-langchain4j-pgvector.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@

:summaryTableId: quarkus-langchain4j-pgvector
[.configuration-legend]
icon:lock[title=Fixed at build time] Configuration property fixed at build time - All other configuration properties are overridable at runtime
[.configuration-reference.searchable, cols="80,.^10,.^10"]
|===

h|[[quarkus-langchain4j-pgvector_configuration]]link:#quarkus-langchain4j-pgvector_configuration[Configuration property]

h|Type
h|Default

a| [[quarkus-langchain4j-pgvector_quarkus.langchain4j.pgvector.table]]`link:#quarkus-langchain4j-pgvector_quarkus.langchain4j.pgvector.table[quarkus.langchain4j.pgvector.table]`


[.description]
--
The table name for storing embeddings

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PGVECTOR_TABLE+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PGVECTOR_TABLE+++`
endif::add-copy-button-to-env-var[]
--|string
|`embeddings`


a| [[quarkus-langchain4j-pgvector_quarkus.langchain4j.pgvector.dimension]]`link:#quarkus-langchain4j-pgvector_quarkus.langchain4j.pgvector.dimension[quarkus.langchain4j.pgvector.dimension]`


[.description]
--
The dimension of the embedding vectors. This has to be the same as the dimension of vectors produced by the embedding model that you use. For example, AllMiniLmL6V2QuantizedEmbeddingModel produces vectors of dimension 384. OpenAI's text-embedding-ada-002 produces vectors of dimension 1536.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PGVECTOR_DIMENSION+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PGVECTOR_DIMENSION+++`
endif::add-copy-button-to-env-var[]
--|int
|required icon:exclamation-circle[title=Configuration property is required]


a| [[quarkus-langchain4j-pgvector_quarkus.langchain4j.pgvector.use-index]]`link:#quarkus-langchain4j-pgvector_quarkus.langchain4j.pgvector.use-index[quarkus.langchain4j.pgvector.use-index]`


[.description]
--
Use index or not

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PGVECTOR_USE_INDEX+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PGVECTOR_USE_INDEX+++`
endif::add-copy-button-to-env-var[]
--|boolean
|`false`


a| [[quarkus-langchain4j-pgvector_quarkus.langchain4j.pgvector.index-list-size]]`link:#quarkus-langchain4j-pgvector_quarkus.langchain4j.pgvector.index-list-size[quarkus.langchain4j.pgvector.index-list-size]`


[.description]
--
index size

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PGVECTOR_INDEX_LIST_SIZE+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PGVECTOR_INDEX_LIST_SIZE+++`
endif::add-copy-button-to-env-var[]
--|int
|`0`


a| [[quarkus-langchain4j-pgvector_quarkus.langchain4j.pgvector.create-table]]`link:#quarkus-langchain4j-pgvector_quarkus.langchain4j.pgvector.create-table[quarkus.langchain4j.pgvector.create-table]`


[.description]
--
Create table or not

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PGVECTOR_CREATE_TABLE+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PGVECTOR_CREATE_TABLE+++`
endif::add-copy-button-to-env-var[]
--|boolean
|`true`


a| [[quarkus-langchain4j-pgvector_quarkus.langchain4j.pgvector.drop-table-first]]`link:#quarkus-langchain4j-pgvector_quarkus.langchain4j.pgvector.drop-table-first[quarkus.langchain4j.pgvector.drop-table-first]`


[.description]
--
Drop table or not

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_PGVECTOR_DROP_TABLE_FIRST+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_PGVECTOR_DROP_TABLE_FIRST+++`
endif::add-copy-button-to-env-var[]
--|boolean
|`false`

|===
42 changes: 42 additions & 0 deletions docs/modules/ROOT/pages/pgvector-store.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
= Pgvector Document Store for Retrieval Augmented Generation (RAG)

include::./includes/attributes.adoc[]

When implementing Retrieval Augmented Generation (RAG), a capable document store is necessary. This guide will explain how to leverage a pgvector database as the document store.

== Leveraging the pgvector Document Store

To utilize the Redis document store, you'll need to include the following dependency:

[source,xml,subs=attributes+]
----
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-pgvector</artifactId>
<version>{project-version}</version>
</dependency>
----

This extension will check for a default datasource, ensure you have defined at least one datasource. For detailed guidance, refer to the link:https://quarkus.io/guides/datasource[CONFIGURE DATA SOURCES IN QUARKUS].

IMPORTANT: If you plan to use `devservices` be sure to use this property : `quarkus.datasource.devservices.image-name=ankane/pgvector:v0.5.1`.

IMPORTANT: The pgvector store requires the dimension of the vector to be set. Add the `quarkus.langchain4j.pgvector.dimension` property to your `application.properties` file and set it to the dimension of the vector. The dimension depends on the embedding model you use.
For example, `AllMiniLmL6V2QuantizedEmbeddingModel` produces vectors of dimension 384. OpenAI’s `text-embedding-ada-002` produces vectors of dimension 1536.

Upon installing the extension, you can utilize the pgvector store using the following code:

[source,java]
----
include::{examples-dir}/io/quarkiverse/langchain4j/samples/IngestorExampleWithPgvector.java[]
----

== Configuration Settings

Customize the behavior of the extension by exploring various configuration options:

include::includes/quarkus-langchain4j-pgvector.adoc[leveloffset=+1,opts=optional]

== Under the Hood

Each ingested document is saved as a row in a Postgres table, containing the embedding column stored as a vector.
14 changes: 0 additions & 14 deletions docs/modules/ROOT/pages/pinecone-store.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,6 @@ To make use of the Pinecone document store, you'll need to include the following
</dependency>
----

The required configuration properties to make the extension work are
`quarkus.langchain4j.pinecone.api-key`,
`quarkus.langchain4j.pinecone.environment`,
`quarkus.langchain4j.pinecone.index-name`, and
`quarkus.langchain4j.pinecone.project-id`. The specified index will be
created if it doesn't exist yet.

Upon installing the extension, you can utilize the Pinecone embedding store using the following code:

[source,java]
----
include::{examples-dir}/io/quarkiverse/langchain4j/samples/IngestorExampleWithPinecone.java[]
----

== Configuration Settings

Customize the behavior of the extension by exploring various configuration options:
Expand Down
6 changes: 6 additions & 0 deletions docs/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@
<artifactId>quarkus-langchain4j-pinecone-deployment</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-pgvector-deployment</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-hugging-face-deployment</artifactId>
Expand Down Expand Up @@ -121,6 +126,7 @@
<include>quarkus-langchain4j-redis.adoc</include>
<include>quarkus-langchain4j-chroma.adoc</include>
<include>quarkus-langchain4j-pinecone.adoc</include>
<include>quarkus-langchain4j-pgvector.adoc</include>
<filtering>false</filtering>
</resource>
<resource>
Expand Down
79 changes: 79 additions & 0 deletions pgvector/deployment/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-pgvector-parent</artifactId>
<version>999-SNAPSHOT</version>
</parent>
<artifactId>quarkus-langchain4j-pgvector-deployment</artifactId>
<name>Quarkus langchain4j-pgvector - Deployment</name>
<dependencies>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-pgvector</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-arc-deployment</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-jackson-deployment</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-agroal-deployment</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-jdbc-postgresql-deployment</artifactId>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-core-deployment</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-junit5-internal</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>${assertj.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.wiremock</groupId>
<artifactId>wiremock-standalone</artifactId>
<version>${wiremock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-embeddings-all-minilm-l6-v2-q</artifactId>
<version>${langchain4j.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessorPaths>
<path>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-extension-processor</artifactId>
<version>${quarkus.version}</version>
</path>
</annotationProcessorPaths>
</configuration>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package io.quarkiverse.langchain4j.pgvector.deployment;

import jakarta.enterprise.context.ApplicationScoped;

import org.jboss.jandex.ClassType;
import org.jboss.jandex.DotName;
import org.jboss.jandex.ParameterizedType;

import com.pgvector.PGvector;

import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.store.embedding.EmbeddingStore;
import io.agroal.api.AgroalDataSource;
import io.quarkiverse.langchain4j.pgvector.PgVectorEmbeddingStore;
import io.quarkiverse.langchain4j.pgvector.runtime.PgVectorEmbeddingStoreConfig;
import io.quarkiverse.langchain4j.pgvector.runtime.PgVectorEmbeddingStoreRecorder;
import io.quarkus.arc.deployment.SyntheticBeanBuildItem;
import io.quarkus.deployment.annotations.BuildProducer;
import io.quarkus.deployment.annotations.BuildStep;
import io.quarkus.deployment.annotations.ExecutionTime;
import io.quarkus.deployment.annotations.Record;
import io.quarkus.deployment.builditem.FeatureBuildItem;
import io.quarkus.deployment.builditem.nativeimage.ReflectiveClassBuildItem;

class Langchain4jPgvectorProcessor {

public static final DotName PGVECTOR_EMBEDDING_STORE = DotName.createSimple(PgVectorEmbeddingStore.class);

private static final String FEATURE = "langchain4j-pgvector";

@BuildStep
FeatureBuildItem feature() {
return new FeatureBuildItem(FEATURE);
}

@BuildStep
@Record(ExecutionTime.RUNTIME_INIT)
public void createBean(
BuildProducer<SyntheticBeanBuildItem> beanProducer,
PgVectorEmbeddingStoreRecorder recorder,
PgVectorEmbeddingStoreConfig config) {
beanProducer.produce(SyntheticBeanBuildItem
.configure(PGVECTOR_EMBEDDING_STORE)
.types(ClassType.create(EmbeddingStore.class),
ParameterizedType.create(EmbeddingStore.class, ClassType.create(TextSegment.class)))
.setRuntimeInit()
.defaultBean()
.scope(ApplicationScoped.class)
.addInjectionPoint(ClassType.create(DotName.createSimple(AgroalDataSource.class)))
.createWith(recorder.embeddingStoreFunction(config))
.done());

}

@BuildStep
public ReflectiveClassBuildItem reflectiveClass() {
return ReflectiveClassBuildItem.builder(PGvector.class).build();
}
}
Loading