-
Notifications
You must be signed in to change notification settings - Fork 99
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
16 changed files
with
1,146 additions
and
0 deletions.
There are no files selected for viewing
41 changes: 41 additions & 0 deletions
41
...dules/ROOT/examples/io/quarkiverse/langchain4j/samples/IngestorExampleWithOpenSearch.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package io.quarkiverse.langchain4j.samples; | ||
|
||
import static dev.langchain4j.data.document.splitter.DocumentSplitters.recursive; | ||
|
||
import java.util.List; | ||
|
||
import jakarta.enterprise.context.ApplicationScoped; | ||
import jakarta.inject.Inject; | ||
|
||
import dev.langchain4j.data.document.Document; | ||
import dev.langchain4j.model.embedding.EmbeddingModel; | ||
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor; | ||
import io.quarkiverse.langchain4j.opensearch.OpenSearchEmbeddingStore; | ||
|
||
@ApplicationScoped | ||
public class IngestorExampleWithOpenSearch { | ||
|
||
/** | ||
* The embedding store (the database). | ||
* The bean is provided by the quarkus-langchain4j-opensearch extension. | ||
*/ | ||
@Inject | ||
OpenSearchEmbeddingStore store; | ||
|
||
/** | ||
* The embedding model (how is computed the vector of a document). | ||
* The bean is provided by the LLM (like openai) extension. | ||
*/ | ||
@Inject | ||
EmbeddingModel embeddingModel; | ||
|
||
public void ingest(List<Document> documents) { | ||
EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder() | ||
.embeddingStore(store) | ||
.embeddingModel(embeddingModel) | ||
.documentSplitter(recursive(500, 0)) | ||
.build(); | ||
// Warning - this can take a long time... | ||
ingestor.ingest(documents); | ||
} | ||
} |
29 changes: 29 additions & 0 deletions
29
docs/modules/ROOT/pages/includes/quarkus-langchain4j-opensearch.adoc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
|
||
:summaryTableId: quarkus-langchain4j-opensearch | ||
[.configuration-legend] | ||
icon:lock[title=Fixed at build time] Configuration property fixed at build time - All other configuration properties are overridable at runtime | ||
[.configuration-reference.searchable, cols="80,.^10,.^10"] | ||
|=== | ||
|
||
h|[[quarkus-langchain4j-opensearch_configuration]]link:#quarkus-langchain4j-opensearch_configuration[Configuration property] | ||
|
||
h|Type | ||
h|Default | ||
|
||
a| [[quarkus-langchain4j-opensearch_quarkus.langchain4j.opensearch.index]]`link:#quarkus-langchain4j-opensearch_quarkus.langchain4j.opensearch.index[quarkus.langchain4j.opensearch.index]` | ||
|
||
|
||
[.description] | ||
-- | ||
Name of the index that will be used in OpenSearch when searching for related embeddings. If this index doesn't exist, it will be created. | ||
|
||
ifdef::add-copy-button-to-env-var[] | ||
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_OPENSEARCH_INDEX+++[] | ||
endif::add-copy-button-to-env-var[] | ||
ifndef::add-copy-button-to-env-var[] | ||
Environment variable: `+++QUARKUS_LANGCHAIN4J_OPENSEARCH_INDEX+++` | ||
endif::add-copy-button-to-env-var[] | ||
--|string | ||
|`default` | ||
|
||
|=== |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
= OpenSearch Document Store for Retrieval Augmented Generation (RAG) | ||
|
||
include::./includes/attributes.adoc[] | ||
|
||
When implementing Retrieval Augmented Generation (RAG), a capable document store is necessary. This guide will explain how to leverage a pgvector database as the document store. | ||
|
||
== Leveraging the OpenSearch Document Store | ||
|
||
To utilize the OpenSearch document store, you'll need to include the following dependency: | ||
|
||
[source,xml,subs=attributes+] | ||
---- | ||
<dependency> | ||
<groupId>io.quarkiverse.langchain4j</groupId> | ||
<artifactId>quarkus-langchain4j-opensearch</artifactId> | ||
<version>{project-version}</version> | ||
</dependency> | ||
---- | ||
|
||
This extension relies on the OpenSearch Java Client, make sure you have one configured correctly. | ||
|
||
Upon installing the extension, you can utilize the pgvector store using the following code: | ||
|
||
[source,java] | ||
---- | ||
include::{examples-dir}/io/quarkiverse/langchain4j/samples/IngestorExampleWithOpenSearch.java[] | ||
---- | ||
|
||
== Configuration Settings | ||
|
||
Customize the behavior of the extension by exploring various configuration options: | ||
|
||
include::includes/quarkus-langchain4j-pgvector.adoc[leveloffset=+1,opts=optional] | ||
|
||
== Under the Hood | ||
|
||
Each ingested document is saved as a row in a Postgres table, containing the embedding column stored as a vector. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd" | ||
xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<parent> | ||
<groupId>io.quarkiverse.langchain4j</groupId> | ||
<artifactId>quarkus-langchain4j-opensearch-parent</artifactId> | ||
<version>999-SNAPSHOT</version> | ||
</parent> | ||
<artifactId>quarkus-langchain4j-opensearch-deployment</artifactId> | ||
<name>Quarkus Langchain4j - Opensearch embedding store - Deployment</name> | ||
<dependencies> | ||
<dependency> | ||
<groupId>io.quarkus</groupId> | ||
<artifactId>quarkus-arc-deployment</artifactId> | ||
</dependency> | ||
<dependency> | ||
<groupId>io.quarkiverse.langchain4j</groupId> | ||
<artifactId>quarkus-langchain4j-opensearch</artifactId> | ||
<version>${project.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>io.quarkiverse.langchain4j</groupId> | ||
<artifactId>quarkus-langchain4j-core-deployment</artifactId> | ||
<version>${project.version}</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>io.quarkiverse.opensearch</groupId> | ||
<artifactId>quarkus-opensearch-java-client-deployment</artifactId> | ||
<version>1.4.0</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>io.quarkus</groupId> | ||
<artifactId>quarkus-junit5-internal</artifactId> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.assertj</groupId> | ||
<artifactId>assertj-core</artifactId> | ||
<version>${assertj.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.wiremock</groupId> | ||
<artifactId>wiremock-standalone</artifactId> | ||
<version>${wiremock.version}</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j-embeddings-all-minilm-l6-v2-q</artifactId> | ||
<version>${langchain4j-embeddings.version}</version> | ||
<scope>test</scope> | ||
<exclusions> | ||
<exclusion> | ||
<groupId>dev.langchain4j</groupId> | ||
<artifactId>langchain4j-core</artifactId> | ||
</exclusion> | ||
</exclusions> | ||
</dependency> | ||
</dependencies> | ||
<build> | ||
<plugins> | ||
<plugin> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
<configuration> | ||
<annotationProcessorPaths> | ||
<path> | ||
<groupId>io.quarkus</groupId> | ||
<artifactId>quarkus-extension-processor</artifactId> | ||
<version>${quarkus.version}</version> | ||
</path> | ||
</annotationProcessorPaths> | ||
</configuration> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
</project> |
60 changes: 60 additions & 0 deletions
60
...java/io/quarkiverse/langchain4j/opensearch/deployment/Langchain4jOpensearchProcessor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
package io.quarkiverse.langchain4j.opensearch.deployment; | ||
|
||
import jakarta.enterprise.context.ApplicationScoped; | ||
import jakarta.enterprise.inject.Default; | ||
|
||
import org.jboss.jandex.AnnotationInstance; | ||
import org.jboss.jandex.ClassType; | ||
import org.jboss.jandex.DotName; | ||
import org.jboss.jandex.ParameterizedType; | ||
import org.opensearch.client.opensearch.OpenSearchClient; | ||
|
||
import dev.langchain4j.data.segment.TextSegment; | ||
import dev.langchain4j.store.embedding.EmbeddingStore; | ||
import io.quarkiverse.langchain4j.deployment.EmbeddingStoreBuildItem; | ||
import io.quarkiverse.langchain4j.opensearch.OpenSearchEmbeddingStore; | ||
import io.quarkiverse.langchain4j.opensearch.runtime.OpenSearchEmbeddingStoreConfig; | ||
import io.quarkiverse.langchain4j.opensearch.runtime.OpenSearchEmbeddingStoreRecorder; | ||
import io.quarkus.arc.deployment.SyntheticBeanBuildItem; | ||
import io.quarkus.deployment.annotations.BuildProducer; | ||
import io.quarkus.deployment.annotations.BuildStep; | ||
import io.quarkus.deployment.annotations.ExecutionTime; | ||
import io.quarkus.deployment.annotations.Record; | ||
import io.quarkus.deployment.builditem.FeatureBuildItem; | ||
|
||
class Langchain4jOpensearchProcessor { | ||
|
||
public static final DotName OPENSEARCH_EMBEDDING_STORE = DotName.createSimple(OpenSearchEmbeddingStore.class); | ||
|
||
private static final String FEATURE = "langchain4j-opensearch"; | ||
|
||
@BuildStep | ||
FeatureBuildItem feature() { | ||
return new FeatureBuildItem(FEATURE); | ||
} | ||
|
||
@BuildStep | ||
@Record(ExecutionTime.RUNTIME_INIT) | ||
public void createBean( | ||
BuildProducer<SyntheticBeanBuildItem> beanProducer, | ||
OpenSearchEmbeddingStoreRecorder recorder, | ||
OpenSearchEmbeddingStoreConfig config, | ||
BuildProducer<EmbeddingStoreBuildItem> embeddingStoreProducer) { | ||
AnnotationInstance openSearchClientQualifier; | ||
openSearchClientQualifier = AnnotationInstance.builder(Default.class).build(); | ||
|
||
beanProducer.produce(SyntheticBeanBuildItem | ||
.configure(OPENSEARCH_EMBEDDING_STORE) | ||
.types(ClassType.create(EmbeddingStore.class), | ||
ParameterizedType.create(EmbeddingStore.class, ClassType.create(TextSegment.class))) | ||
.setRuntimeInit() | ||
.defaultBean() | ||
.scope(ApplicationScoped.class) | ||
.addInjectionPoint(ClassType.create(DotName.createSimple(OpenSearchClient.class)), | ||
openSearchClientQualifier) | ||
.createWith(recorder.embeddingStoreFunction(config)) | ||
.done()); | ||
embeddingStoreProducer.produce(new EmbeddingStoreBuildItem()); | ||
} | ||
|
||
} |
Oops, something went wrong.