From d7a367c831f1af9d53b6d8c4ef09f0c823bdf4ed Mon Sep 17 00:00:00 2001 From: mariofusco Date: Mon, 28 Oct 2024 18:12:48 +0100 Subject: [PATCH] Add Jlama documentation --- docs/modules/ROOT/nav.adoc | 1 + .../includes/quarkus-langchain4j-jlama.adoc | 323 ++++++++++++++++++ docs/modules/ROOT/pages/index.adoc | 2 +- docs/modules/ROOT/pages/jlama.adoc | 73 ++++ docs/modules/ROOT/pages/llms.adoc | 3 +- 5 files changed, 400 insertions(+), 2 deletions(-) create mode 100644 docs/modules/ROOT/pages/includes/quarkus-langchain4j-jlama.adoc create mode 100644 docs/modules/ROOT/pages/jlama.adoc diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc index 407d7648c..b80f29ad4 100644 --- a/docs/modules/ROOT/nav.adoc +++ b/docs/modules/ROOT/nav.adoc @@ -13,6 +13,7 @@ ** xref:openai.adoc[OpenAI] ** xref:huggingface.adoc[HuggingFace] ** xref:ollama.adoc[Ollama] +** xref:jlama.adoc[Jlama] ** xref:podman.adoc[Podman AI Lab] ** xref:anthropic.adoc[Anthropic (Claude)] ** xref:mistral.adoc[Mistral AI] diff --git a/docs/modules/ROOT/pages/includes/quarkus-langchain4j-jlama.adoc b/docs/modules/ROOT/pages/includes/quarkus-langchain4j-jlama.adoc new file mode 100644 index 000000000..0dc890176 --- /dev/null +++ b/docs/modules/ROOT/pages/includes/quarkus-langchain4j-jlama.adoc @@ -0,0 +1,323 @@ + +:summaryTableId: quarkus-langchain4j-jlama +[.configuration-legend] +icon:lock[title=Fixed at build time] Configuration property fixed at build time - All other configuration properties are overridable at runtime +[.configuration-reference.searchable, cols="80,.^10,.^10"] +|=== + +h|[[quarkus-langchain4j-jlama_configuration]]link:#quarkus-langchain4j-jlama_configuration[Configuration property] + +h|Type +h|Default + +a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-chat-model-enabled]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-chat-model-enabled[quarkus.langchain4j.jlama.chat-model.enabled]` + + +[.description] +-- +Whether the chat model should be enabled + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA_CHAT_MODEL_ENABLED+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA_CHAT_MODEL_ENABLED+++` +endif::add-copy-button-to-env-var[] +--|boolean +|`true` + + +a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-embedding-model-enabled]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-embedding-model-enabled[quarkus.langchain4j.jlama.embedding-model.enabled]` + + +[.description] +-- +Whether the embedding model should be enabled + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA_EMBEDDING_MODEL_ENABLED+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA_EMBEDDING_MODEL_ENABLED+++` +endif::add-copy-button-to-env-var[] +--|boolean +|`true` + + +a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-chat-model-model-name]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-chat-model-model-name[quarkus.langchain4j.jlama.chat-model.model-name]` + + +[.description] +-- +Chat model to use. The default value is `tjake/granite-3.0-2b-instruct-JQ4` + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA_CHAT_MODEL_MODEL_NAME+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA_CHAT_MODEL_MODEL_NAME+++` +endif::add-copy-button-to-env-var[] +--|string +|`tjake/granite-3.0-2b-instruct-JQ4` + + +a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-embedding-model-model-name]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-embedding-model-model-name[quarkus.langchain4j.jlama.embedding-model.model-name]` + + +[.description] +-- +Embedding model to use. The default value is `intfloat/e5-small-v2` + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA_EMBEDDING_MODEL_MODEL_ID+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA_EMBEDDING_MODEL_MODEL_ID+++` +endif::add-copy-button-to-env-var[] +--|string +|`intfloat/e5-small-v2` + + +a| [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-log-requests]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-log-requests[quarkus.langchain4j.jlama.log-requests]` + + +[.description] +-- +Whether the jlama requests should be logged + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA_LOG_REQUESTS+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA_LOG_REQUESTS+++` +endif::add-copy-button-to-env-var[] +--|boolean +|`false` + + +a| [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-log-responses]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-log-responses[quarkus.langchain4j.jlama.log-responses]` + + +[.description] +-- +Whether the jlama responses should be logged + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA_LOG_RESPONSES+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA_LOG_RESPONSES+++` +endif::add-copy-button-to-env-var[] +--|boolean +|`false` + + +a| [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-chat-model-temperature]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-chat-model-temperature[quarkus.langchain4j.jlama.chat-model.temperature]` + + +[.description] +-- +The temperature of the model. Increasing the temperature will make the model answer with more creativity. A lower temperature will make the model answer more conservatively. + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA_CHAT_MODEL_TEMPERATURE+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA_CHAT_MODEL_TEMPERATURE+++` +endif::add-copy-button-to-env-var[] +--|double +|`0.3` + + +a| [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-embedding-model-temperature]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-embedding-model-temperature[quarkus.langchain4j.jlama.embedding-model.temperature]` + + +[.description] +-- +The temperature of the model. Increasing the temperature will make the model answer with more variability. A lower temperature will make the model answer more conservatively. + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA_EMBEDDING_MODEL_TEMPERATURE+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA_EMBEDDING_MODEL_TEMPERATURE+++` +endif::add-copy-button-to-env-var[] +--|double +|`0.3` + + +a| [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-embedding-model-log-requests]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-embedding-model-log-requests[quarkus.langchain4j.jlama.embedding-model.log-requests]` + + +[.description] +-- +Whether the jlama requests should be logged + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA_EMBEDDING_MODEL_LOG_REQUESTS+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA_EMBEDDING_MODEL_LOG_REQUESTS+++` +endif::add-copy-button-to-env-var[] +--|boolean +|`false` + + +a| [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-embedding-model-log-responses]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-embedding-model-log-responses[quarkus.langchain4j.jlama.embedding-model.log-responses]` + + +[.description] +-- +Whether the jlama responses should be logged + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA_EMBEDDING_MODEL_LOG_RESPONSES+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA_EMBEDDING_MODEL_LOG_RESPONSES+++` +endif::add-copy-button-to-env-var[] +--|boolean +|`false` + + +h|[[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-named-config-named-model-config]]link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-named-config-named-model-config[Named model config] + +h|Type +h|Default + +a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-chat-model-model-name]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-chat-model-model-name[quarkus.langchain4j.jlama."model-name".chat-model.model-name]` + + +[.description] +-- +Model to use. The default value is `tjake/granite-3.0-2b-instruct-JQ4` + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__CHAT_MODEL_MODEL_ID+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__CHAT_MODEL_MODEL_ID+++` +endif::add-copy-button-to-env-var[] +--|string +|`tjake/granite-3.0-2b-instruct-JQ4` + + +a|icon:lock[title=Fixed at build time] [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-embedding-model-model-name]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-embedding-model-model-name[quarkus.langchain4j.jlama."model-name".embedding-model.model-name]` + + +[.description] +-- +Model to use. The default value is `intfloat/e5-small-v2` + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__EMBEDDING_MODEL_MODEL_ID+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__EMBEDDING_MODEL_MODEL_ID+++` +endif::add-copy-button-to-env-var[] +--|string +|`intfloat/e5-small-v2` + + +a| [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-log-requests]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-log-requests[quarkus.langchain4j.jlama."model-name".log-requests]` + + +[.description] +-- +Whether the jlama requests should be logged + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__LOG_REQUESTS+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__LOG_REQUESTS+++` +endif::add-copy-button-to-env-var[] +--|boolean +|`false` + + +a| [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-log-responses]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-log-responses[quarkus.langchain4j.jlama."model-name".log-responses]` + + +[.description] +-- +Whether the jlama responses should be logged + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__LOG_RESPONSES+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__LOG_RESPONSES+++` +endif::add-copy-button-to-env-var[] +--|boolean +|`false` + + +a| [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-chat-model-temperature]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-chat-model-temperature[quarkus.langchain4j.jlama."model-name".chat-model.temperature]` + + +[.description] +-- +The temperature of the model. Increasing the temperature will make the model answer with more creativity. A lower temperature will make the model answer more conservatively. + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__CHAT_MODEL_TEMPERATURE+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__CHAT_MODEL_TEMPERATURE+++` +endif::add-copy-button-to-env-var[] +--|double +|`0.3` + + +a| [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-embedding-model-temperature]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-embedding-model-temperature[quarkus.langchain4j.jlama."model-name".embedding-model.temperature]` + + +[.description] +-- +The temperature of the model. Increasing the temperature will make the model answer with more creativity. A lower temperature will make the model answer more conservatively. + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__EMBEDDING_MODEL_TEMPERATURE+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__EMBEDDING_MODEL_TEMPERATURE+++` +endif::add-copy-button-to-env-var[] +--|double +|`0.3` + + +a| [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-embedding-model-log-requests]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-embedding-model-log-requests[quarkus.langchain4j.jlama."model-name".embedding-model.log-requests]` + + +[.description] +-- +Whether embedding model requests should be logged + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__EMBEDDING_MODEL_LOG_REQUESTS+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__EMBEDDING_MODEL_LOG_REQUESTS+++` +endif::add-copy-button-to-env-var[] +--|boolean +|`false` + + +a| [[quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-embedding-model-log-responses]]`link:#quarkus-langchain4j-jlama_quarkus-langchain4j-jlama-model-name-embedding-model-log-responses[quarkus.langchain4j.jlama."model-name".embedding-model.log-responses]` + + +[.description] +-- +Whether embedding model responses should be logged + +ifdef::add-copy-button-to-env-var[] +Environment variable: env_var_with_copy_button:+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__EMBEDDING_MODEL_LOG_RESPONSES+++[] +endif::add-copy-button-to-env-var[] +ifndef::add-copy-button-to-env-var[] +Environment variable: `+++QUARKUS_LANGCHAIN4J_JLAMA__MODEL_NAME__EMBEDDING_MODEL_LOG_RESPONSES+++` +endif::add-copy-button-to-env-var[] +--|boolean +|`false` + +|=== diff --git a/docs/modules/ROOT/pages/index.adoc b/docs/modules/ROOT/pages/index.adoc index c01eee948..e14c26333 100644 --- a/docs/modules/ROOT/pages/index.adoc +++ b/docs/modules/ROOT/pages/index.adoc @@ -14,7 +14,7 @@ For instance, an application utilizing this extension can: - Generate personalized text such as emails or reports This extension is built upon the https://github.com/langchain4j/langchain4j[LangChain4j library]. -It offers a declarative approach to interact with diverse LLMs like OpenAI, Hugging Face, or Ollama. It facilitates LLM-invoked functions within Quarkus applications and allows document loading within the LLM "context". +It offers a declarative approach to interact with diverse LLMs like OpenAI, Hugging Face, Ollama, or Jlama. It facilitates LLM-invoked functions within Quarkus applications and allows document loading within the LLM "context". image::llms-big-picture.png[width=600,align="center"] diff --git a/docs/modules/ROOT/pages/jlama.adoc b/docs/modules/ROOT/pages/jlama.adoc new file mode 100644 index 000000000..d46824207 --- /dev/null +++ b/docs/modules/ROOT/pages/jlama.adoc @@ -0,0 +1,73 @@ += Jlama + +include::./includes/attributes.adoc[] + +https://github.com/tjake/Jlama[Jlama] provides a way to run large language models (LLMs) locally and in pure Java, eventually also embedded in your Quarkus application. +You can run many https://huggingface.co/tjake[models] such as LLama3, Mistral, Granite and many others on your machine. + +[#_prerequisites] +== Prerequisites + +To use Jlama it is necessary to run on Java 20 or later. This is because it utilizes the new https://openjdk.org/jeps/448[Vector API] for faster inference. Note that the Vector API is still a Java preview features, so it is required to explicitly enable it. + +=== Dev Service + +Quarkus LangChain4j automatically handles the pulling of the models configured by the application, so there is no need for users to do so manually. + +WARNING: Models are hug, so make sure you have enough disk space. + +NOTE: Due to model's large size, pulling them can take time + +== Using Jlama + +To let Jlama running inference on your models, add the following dependency into your project: + +[source,xml,subs=attributes+] +---- + + io.quarkiverse.langchain4j + quarkus-langchain4j-jlama + {project-version} + +---- + +If no other LLM extension is installed, link:../ai-services.adoc[AI Services] will automatically utilize the configured Jlama model. + +By default, the extension uses as model https://huggingface.co/tjake/TinyLlama-1.1B-Chat-v1.0-Jlama-Q4[`TinyLlama-1.1B-Chat-v1.0-Jlama-Q4`]. +You can change it by setting the `quarkus.langchain4j.jlama.chat-model.model-name` property in the `application.properties` file: + +[source,properties,subs=attributes+] +---- +quarkus.langchain4j.jlama.chat-model.model-name=tjake/granite-3.0-2b-instruct-JQ4 +---- + +=== Configuration + +Several configuration properties are available: + +include::includes/quarkus-langchain4j-jlama.adoc[leveloffset=+1,opts=optional] + +== Document Retriever and Embedding + +Jlama also provides embedding models. +By default, it uses `intfloat/e5-small-v2`. + +You can change the default embedding model by setting the `quarkus.langchain4j.jlama.embedding-model.model-name` property in the `application.properties` file: + +[source,properties,subs=attributes+] +---- +quarkus.langchain4j.log-requests=true +quarkus.langchain4j.log-responses=true + +quarkus.langchain4j.jlama.chat-model.model-id=tjake/granite-3.0-2b-instruct-JQ4 +quarkus.langchain4j.jlama.embedding-model.model-id=intfloat/e5-small-v2 +---- + +If no other LLM extension is installed, retrieve the embedding model as follows: + +[source, java] +---- +@Inject EmbeddingModel model; // Injects the embedding model +---- + + diff --git a/docs/modules/ROOT/pages/llms.adoc b/docs/modules/ROOT/pages/llms.adoc index d83955f15..f35d33970 100644 --- a/docs/modules/ROOT/pages/llms.adoc +++ b/docs/modules/ROOT/pages/llms.adoc @@ -15,7 +15,7 @@ Ethical concerns and biases within LLMs are topics of ongoing discussion and res Continued research and development in LLMs are constantly pushing the boundaries of what AI can achieve in language understanding and generation. LLMs are a core component of the Quarkus LangChain4j extension. -The extension does not serve its own LLMs, but rather provides a standard interface for interacting with many different LLMs such as OpenAI GPT-3/4, Hugging Face, and Ollama. +The extension does not serve its own LLMs, but rather provides a standard interface for interacting with many different LLMs such as OpenAI GPT-3/4, Hugging Face, Ollama, and Jlama. This xref:ai-services.adoc[interface] is designed to be simple and intuitive, allowing developers to quickly integrate LLMs into their applications. Note that each LLM has a different feature set. @@ -24,6 +24,7 @@ Please check the specific documentation for the LLM you are using to see what fe - xref:openai.adoc[OpenAI (GPT-3/4)] - xref:huggingface.adoc[Hugging Face] - xref:ollama.adoc[Ollama] +- xref:jlama.adoc[Jlama] - xref:watsonx.adoc[IBM watsonx.ai] - xref:mistral.adoc[Mistral AI]