Skip to content

Commit

Permalink
Merge pull request #841 from jmartisk/tavily
Browse files Browse the repository at this point in the history
Tavily web search engine integration
  • Loading branch information
geoand authored Aug 30, 2024
2 parents b9c9cfc + 4eb8947 commit 1bb873c
Show file tree
Hide file tree
Showing 34 changed files with 1,688 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ public void telemetry(Capabilities capabilities, BuildProducer<AdditionalBeanBui
@Record(ExecutionTime.STATIC_INIT)
public void handleTools(CombinedIndexBuildItem indexBuildItem,
ToolsRecorder recorder,
BuildProducer<AdditionalBeanBuildItem> additionalBeanProducer,
RecorderContext recorderContext,
BuildProducer<BytecodeTransformerBuildItem> transformerProducer,
BuildProducer<GeneratedClassBuildItem> generatedClassProducer,
Expand Down Expand Up @@ -176,6 +177,14 @@ public void handleTools(CombinedIndexBuildItem indexBuildItem,
boolean ignoreToolMethod = ignoreToolMethod(toolMethod, index);
if (ignoreToolMethod) {
continue;
} else {
// The WebSearchTool class isn't a CDI bean, so if
// we consider it as a tool, we have to also turn it into one
if (LangChain4jDotNames.WEB_SEARCH_TOOL.equals(className)) {
additionalBeanProducer.produce(AdditionalBeanBuildItem.builder()
.addBeanClass(className.toString())
.setUnremovable().build());
}
}

AnnotationValue nameValue = instance.value("name");
Expand Down
1 change: 1 addition & 0 deletions docs/modules/ROOT/nav.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
** xref:easy-rag.adoc[Easy RAG]
** xref:dev-ui.adoc[Dev UI]
** xref:reranking.adoc[Reranking]
** xref:web-search.adoc[Web search]
* Advanced topics
** xref:fault-tolerance.adoc[Fault Tolerance]
Expand Down
220 changes: 220 additions & 0 deletions docs/modules/ROOT/pages/includes/quarkus-langchain4j-tavily.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@

:summaryTableId: quarkus-langchain4j-tavily
[.configuration-legend]
icon:lock[title=Fixed at build time] Configuration property fixed at build time - All other configuration properties are overridable at runtime
[.configuration-reference.searchable, cols="80,.^10,.^10"]
|===

h|[[quarkus-langchain4j-tavily_configuration]]link:#quarkus-langchain4j-tavily_configuration[Configuration property]

h|Type
h|Default

a| [[quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-base-url]]`link:#quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-base-url[quarkus.langchain4j.tavily.base-url]`


[.description]
--
Base URL of the Tavily API

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_TAVILY_BASE_URL+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_TAVILY_BASE_URL+++`
endif::add-copy-button-to-env-var[]
--|string
|`https://api.tavily.com`


a| [[quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-api-key]]`link:#quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-api-key[quarkus.langchain4j.tavily.api-key]`


[.description]
--
API key for the Tavily API

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_TAVILY_API_KEY+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_TAVILY_API_KEY+++`
endif::add-copy-button-to-env-var[]
--|string
|required icon:exclamation-circle[title=Configuration property is required]


a| [[quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-max-results]]`link:#quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-max-results[quarkus.langchain4j.tavily.max-results]`


[.description]
--
Maximum number of results to return

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_TAVILY_MAX_RESULTS+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_TAVILY_MAX_RESULTS+++`
endif::add-copy-button-to-env-var[]
--|int
|`5`


a| [[quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-timeout]]`link:#quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-timeout[quarkus.langchain4j.tavily.timeout]`


[.description]
--
The timeout duration for Tavily requests.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_TAVILY_TIMEOUT+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_TAVILY_TIMEOUT+++`
endif::add-copy-button-to-env-var[]
--|link:https://docs.oracle.com/javase/8/docs/api/java/time/Duration.html[Duration]
link:#duration-note-anchor-{summaryTableId}[icon:question-circle[title=More information about the Duration format]]
|`60S`


a| [[quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-log-requests]]`link:#quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-log-requests[quarkus.langchain4j.tavily.log-requests]`


[.description]
--
Whether requests to Tavily should be logged

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_TAVILY_LOG_REQUESTS+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_TAVILY_LOG_REQUESTS+++`
endif::add-copy-button-to-env-var[]
--|boolean
|`false`


a| [[quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-log-responses]]`link:#quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-log-responses[quarkus.langchain4j.tavily.log-responses]`


[.description]
--
Whether responses from Tavily should be logged

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_TAVILY_LOG_RESPONSES+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_TAVILY_LOG_RESPONSES+++`
endif::add-copy-button-to-env-var[]
--|boolean
|`false`


a| [[quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-search-depth]]`link:#quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-search-depth[quarkus.langchain4j.tavily.search-depth]`


[.description]
--
The search depth to use. This can be "basic" or "advanced". Basic is the default.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_TAVILY_SEARCH_DEPTH+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_TAVILY_SEARCH_DEPTH+++`
endif::add-copy-button-to-env-var[]
-- a|
`basic`, `advanced`
|`basic`


a| [[quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-include-answer]]`link:#quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-include-answer[quarkus.langchain4j.tavily.include-answer]`


[.description]
--
Include a short answer to original query. Default is false.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_TAVILY_INCLUDE_ANSWER+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_TAVILY_INCLUDE_ANSWER+++`
endif::add-copy-button-to-env-var[]
--|boolean
|`false`


a| [[quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-include-raw-content]]`link:#quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-include-raw-content[quarkus.langchain4j.tavily.include-raw-content]`


[.description]
--
Include the cleaned and parsed HTML content of each search result. Default is false.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_TAVILY_INCLUDE_RAW_CONTENT+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_TAVILY_INCLUDE_RAW_CONTENT+++`
endif::add-copy-button-to-env-var[]
--|boolean
|`false`


a| [[quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-include-domains]]`link:#quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-include-domains[quarkus.langchain4j.tavily.include-domains]`


[.description]
--
A list of domains to specifically include in the search results. Default is ++[]++, which includes all domains.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_TAVILY_INCLUDE_DOMAINS+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_TAVILY_INCLUDE_DOMAINS+++`
endif::add-copy-button-to-env-var[]
--|list of string
|`[]`


a| [[quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-exclude-domains]]`link:#quarkus-langchain4j-tavily_quarkus-langchain4j-tavily-exclude-domains[quarkus.langchain4j.tavily.exclude-domains]`


[.description]
--
A list of domains to specifically exclude from the search results. Default is ++[]++, which doesn't exclude any domains.

ifdef::add-copy-button-to-env-var[]
Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_TAVILY_EXCLUDE_DOMAINS+++[]
endif::add-copy-button-to-env-var[]
ifndef::add-copy-button-to-env-var[]
Environment variable: `+++QUARKUS_LANGCHAIN4J_TAVILY_EXCLUDE_DOMAINS+++`
endif::add-copy-button-to-env-var[]
--|list of string
|`[]`

|===
ifndef::no-duration-note[]
[NOTE]
[id='duration-note-anchor-{summaryTableId}']
.About the Duration format
====
To write duration values, use the standard `java.time.Duration` format.
See the link:https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/Duration.html#parse(java.lang.CharSequence)[Duration#parse() Java API documentation] for more information.

You can also use a simplified format, starting with a number:

* If the value is only a number, it represents time in seconds.
* If the value is a number followed by `ms`, it represents time in milliseconds.
In other cases, the simplified format is translated to the `java.time.Duration` format for parsing:

* If the value is a number followed by `h`, `m`, or `s`, it is prefixed with `PT`.
* If the value is a number followed by `d`, it is prefixed with `P`.
====
endif::no-duration-note[]
67 changes: 67 additions & 0 deletions docs/modules/ROOT/pages/web-search.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
= Using web search

Quarkus LangChain4j currently supports the https://tavily.com/[Tavily] search engine.
To use it, add the `quarkus-langchain4j-tavily` extension to your project. You'll need to specify the API key, this is done by the `quarkus.langchain4j.tavily.api-key` property.

After this, you can inject the search engine into your application using

[source,java]
----
@Inject
WebSearchEngine engine;
----

and then use it by calling its `search` method.

If you want to let an chat model use web search by itself, there are
generally two recommended ways to accomplish this: either by implementing a
tool that uses it, or as a content retriever inside a RAG pipeline. The
https://github.com/quarkiverse/quarkus-langchain4j/tree/main/samples/chatbot-web-search[chatbot-web-search]
example in the `quarkus-langchain4j` repository demonstrates using web
search as a tool.

== Using Web search as a tool

To use web search as a tool that the LLM can decide to execute (and the
relevant search results will be the return value of the tool execution), you
can either use the provided tool from the upstream LangChain4j project,
in class `dev.langchain4j.web.search.WebSearchTool`, or implement your own tool
if that one does not fit your requirements. The `samples/chatbot-web-search`
example demonstrates how to use the provided tool.

== Using Web search in a RAG pipeline

There is also a provided content retriever, `dev.langchain4j.rag.content.retriever.WebSearchContentRetriever` that uses
a web search engine to retrieve relevant documents.
For inspiration, the retrieval augmentor that wraps it may look like this:

[source,java]
----
@ApplicationScoped
public class WebSearchRetrievalAugmentor implements Supplier<RetrievalAugmentor> {
@Inject
WebSearchEngine webSearchEngine;
@Inject
ChatLanguageModel chatModel;
@Override
public RetrievalAugmentor get() {
return DefaultRetrievalAugmentor.builder()
.queryTransformer((question) -> {
// before actually querying the engine, we need to transform the
// user's question into a suitable search query
String query = chatModel.generate("Transform the user's question into a suitable query for the " +
"Tavily search engine. The query should yield the results relevant to answering the user's question." +
"User's question: " + question.text());
return Collections.singleton(Query.from(query));
}).contentRetriever(new WebSearchContentRetriever(webSearchEngine, 10))
.build();
}
}
----

== Tavily configuration reference

include::includes/quarkus-langchain4j-tavily.adoc[leveloffset=+1,opts=optional]
19 changes: 19 additions & 0 deletions docs/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@
<artifactId>quarkus-langchain4j-watsonx</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-tavily</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-easy-rag</artifactId>
Expand Down Expand Up @@ -279,6 +284,19 @@
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>io.quarkiverse.langchain4j</groupId>
<artifactId>quarkus-langchain4j-tavily-deployment</artifactId>
<version>${project.version}</version>
<type>pom</type>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>*</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>

<build>
Expand Down Expand Up @@ -343,6 +361,7 @@
<include>quarkus-langchain4j-watsonx.adoc</include>
<include>quarkus-langchain4j-mistralai.adoc</include>
<include>quarkus-langchain4j-neo4j.adoc</include>
<include>quarkus-langchain4j-tavily.adoc</include>
<filtering>false</filtering>
</resource>
<resource>
Expand Down
1 change: 1 addition & 0 deletions docs/src/main/resources/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ quarkus.langchain4j.pinecone.environment=abc
quarkus.langchain4j.pinecone.index-name=abc
quarkus.langchain4j.pinecone.project-id=abc
quarkus.langchain4j.pinecone.api-key=abc
quarkus.langchain4j.tavily.api-key=abc
quarkus.langchain4j.redis.dimension=180
quarkus.langchain4j.easy-rag.path=abc
quarkus.langchain4j.easy-rag.ingestion-strategy=off
Expand Down
3 changes: 3 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@

<module>quarkus-integrations/websockets-next</module>

<module>web-search-engines/tavily</module>

<module>rag/easy-rag</module>
<module>rag/parsers-base</module>

Expand Down Expand Up @@ -205,6 +207,7 @@
<module>samples/chatbot</module>
<module>samples/chatbot-easy-rag</module>
<module>samples/sql-chatbot</module>
<module>samples/chatbot-web-search</module>
</modules>
</profile>
</profiles>
Expand Down
Loading

0 comments on commit 1bb873c

Please sign in to comment.