Skip to content

Commit

Permalink
adding opensearch
Browse files Browse the repository at this point in the history
formatting and doc
  • Loading branch information
sebastienblanc committed Jan 9, 2024
1 parent 229ad86 commit 2e88bf5
Show file tree
Hide file tree
Showing 16 changed files with 1,146 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package io.quarkiverse.langchain4j.samples;

import static dev.langchain4j.data.document.splitter.DocumentSplitters.recursive;

import java.util.List;

import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import io.quarkiverse.langchain4j.opensearch.OpenSearchEmbeddingStore;

@ApplicationScoped
public class IngestorExampleWithOpenSearch {

/**
* The embedding store (the database).
* The bean is provided by the quarkus-langchain4j-opensearch extension.
*/
@Inject
OpenSearchEmbeddingStore store;

/**
* The embedding model (how is computed the vector of a document).
* The bean is provided by the LLM (like openai) extension.
*/
@Inject
EmbeddingModel embeddingModel;

public void ingest(List<Document> documents) {
EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder()
.embeddingStore(store)
.embeddingModel(embeddingModel)
.documentSplitter(recursive(500, 0))
.build();
// Warning - this can take a long time...
ingestor.ingest(documents);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@

:summaryTableId: quarkus-langchain4j-opensearch
[.configuration-legend]
icon:lock[title=Fixed at build time] Configuration property fixed at build time - All other configuration properties are overridable at runtime
[.configuration-reference.searchable, cols="80,.^10,.^10"]
|===

h|[[quarkus-langchain4j-opensearch_configuration]]link:#quarkus-langchain4j-opensearch_configuration[Configuration property]

h|Type
h|Default

a| [[quarkus-langchain4j-opensearch_quarkus.langchain4j.opensearch.index]]`link:#quarkus-langchain4j-opensearch_quarkus.langchain4j.opensearch.index[quarkus.langchain4j.opensearch.index]`


[.description]
--
Name of the index that will be used in OpenSearch when searching for related embeddings. If this index doesn't exist, it will be created.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_OPENSEARCH_INDEX+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_OPENSEARCH_INDEX+++`
endif::add-copy-button-to-env-var[]
--|string
|`default`

|===
37 changes: 37 additions & 0 deletions docs/modules/ROOT/pages/opensearch-store.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
= OpenSearch Document Store for Retrieval Augmented Generation (RAG)

include::./includes/attributes.adoc[]

When implementing Retrieval Augmented Generation (RAG), a capable document store is necessary. This guide will explain how to leverage a pgvector database as the document store.

== Leveraging the OpenSearch Document Store

To utilize the OpenSearch document store, you'll need to include the following dependency:

[source,xml,subs=attributes+]
----
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-opensearch</artifactId>
<version>{project-version}</version>
</dependency>
----

This extension relies on the OpenSearch Java Client, make sure you have one configured correctly.

Upon installing the extension, you can utilize the pgvector store using the following code:

[source,java]
----
include::{examples-dir}/io/quarkiverse/langchain4j/samples/IngestorExampleWithOpenSearch.java[]
----

== Configuration Settings

Customize the behavior of the extension by exploring various configuration options:

include::includes/quarkus-langchain4j-pgvector.adoc[leveloffset=+1,opts=optional]

== Under the Hood

Each ingested document is saved as a row in a Postgres table, containing the embedding column stored as a vector.
11 changes: 11 additions & 0 deletions docs/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@
<artifactId>quarkus-langchain4j-pinecone</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-opensearch</artifactId>
<version>${project.version}</version>
</dependency>

<!-- Make sure the doc is built after the other artifacts -->
<dependency>
Expand Down Expand Up @@ -73,6 +78,11 @@
<artifactId>quarkus-langchain4j-hugging-face-deployment</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-opensearch-deployment</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>

<build>
Expand Down Expand Up @@ -127,6 +137,7 @@
<include>quarkus-langchain4j-chroma.adoc</include>
<include>quarkus-langchain4j-pinecone.adoc</include>
<include>quarkus-langchain4j-pgvector.adoc</include>
<include>quarkus-langchain4j-opensearch.adoc</include>
<filtering>false</filtering>
</resource>
<resource>
Expand Down
79 changes: 79 additions & 0 deletions opensearch/deployment/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"
xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-opensearch-parent</artifactId>
<version>999-SNAPSHOT</version>
</parent>
<artifactId>quarkus-langchain4j-opensearch-deployment</artifactId>
<name>Quarkus Langchain4j - Opensearch embedding store - Deployment</name>
<dependencies>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-arc-deployment</artifactId>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-opensearch</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-core-deployment</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.opensearch</groupId>
<artifactId>quarkus-opensearch-java-client-deployment</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-junit5-internal</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>${assertj.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.wiremock</groupId>
<artifactId>wiremock-standalone</artifactId>
<version>${wiremock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-embeddings-all-minilm-l6-v2-q</artifactId>
<version>${langchain4j-embeddings.version}</version>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-core</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<annotationProcessorPaths>
<path>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-extension-processor</artifactId>
<version>${quarkus.version}</version>
</path>
</annotationProcessorPaths>
</configuration>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package io.quarkiverse.langchain4j.opensearch.deployment;

import jakarta.enterprise.context.ApplicationScoped;
import jakarta.enterprise.inject.Default;

import org.jboss.jandex.AnnotationInstance;
import org.jboss.jandex.ClassType;
import org.jboss.jandex.DotName;
import org.jboss.jandex.ParameterizedType;
import org.opensearch.client.opensearch.OpenSearchClient;

import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.store.embedding.EmbeddingStore;
import io.quarkiverse.langchain4j.deployment.EmbeddingStoreBuildItem;
import io.quarkiverse.langchain4j.opensearch.OpenSearchEmbeddingStore;
import io.quarkiverse.langchain4j.opensearch.runtime.OpenSearchEmbeddingStoreConfig;
import io.quarkiverse.langchain4j.opensearch.runtime.OpenSearchEmbeddingStoreRecorder;
import io.quarkus.arc.deployment.SyntheticBeanBuildItem;
import io.quarkus.deployment.annotations.BuildProducer;
import io.quarkus.deployment.annotations.BuildStep;
import io.quarkus.deployment.annotations.ExecutionTime;
import io.quarkus.deployment.annotations.Record;
import io.quarkus.deployment.builditem.FeatureBuildItem;

class Langchain4jOpensearchProcessor {

public static final DotName OPENSEARCH_EMBEDDING_STORE = DotName.createSimple(OpenSearchEmbeddingStore.class);

private static final String FEATURE = "langchain4j-opensearch";

@BuildStep
FeatureBuildItem feature() {
return new FeatureBuildItem(FEATURE);
}

@BuildStep
@Record(ExecutionTime.RUNTIME_INIT)
public void createBean(
BuildProducer<SyntheticBeanBuildItem> beanProducer,
OpenSearchEmbeddingStoreRecorder recorder,
OpenSearchEmbeddingStoreConfig config,
BuildProducer<EmbeddingStoreBuildItem> embeddingStoreProducer) {
AnnotationInstance openSearchClientQualifier;
openSearchClientQualifier = AnnotationInstance.builder(Default.class).build();

beanProducer.produce(SyntheticBeanBuildItem
.configure(OPENSEARCH_EMBEDDING_STORE)
.types(ClassType.create(EmbeddingStore.class),
ParameterizedType.create(EmbeddingStore.class, ClassType.create(TextSegment.class)))
.setRuntimeInit()
.defaultBean()
.scope(ApplicationScoped.class)
.addInjectionPoint(ClassType.create(DotName.createSimple(OpenSearchClient.class)),
openSearchClientQualifier)
.createWith(recorder.embeddingStoreFunction(config))
.done());
embeddingStoreProducer.produce(new EmbeddingStoreBuildItem());
}

}
Loading

0 comments on commit 2e88bf5

Please sign in to comment.