From 7e7df50bbf6dbf27bcf9bfb926052a7aa6ecfe54 Mon Sep 17 00:00:00 2001 From: Light Date: Wed, 1 Jan 2025 04:02:28 +1100 Subject: [PATCH 01/10] fix(community): Migrate xenova transformers lib to huggingface (#7431) Co-authored-by: jacoblee93 --- docs/api_refs/blacklisted-entrypoints.json | 1 + .../text_embedding/transformers.mdx | 4 +- .../test-exports-cjs/src/import.js | 2 +- .../test-exports-cjs/src/index.mjs | 2 +- .../test-exports-cjs/src/index.ts | 6 +- .../test-exports-cjs/src/require.js | 2 +- .../test-exports-esm/src/import.cjs | 2 +- .../test-exports-esm/src/index.js | 2 +- .../test-exports-esm/src/index.ts | 6 +- .../test-exports-esm/src/require.cjs | 2 +- .../src/models/embeddings/hf_transformers.ts | 2 +- .../src/use_cases/local_retrieval_qa/chain.ts | 2 +- .../local_retrieval_qa/load_documents.ts | 2 +- .../use_cases/local_retrieval_qa/qa_chain.ts | 2 +- libs/langchain-community/.gitignore | 4 + libs/langchain-community/langchain.config.js | 2 + libs/langchain-community/package.json | 23 +- .../src/embeddings/hf_transformers.ts | 34 +-- .../embeddings/huggingface_transformers.ts | 128 +++++++++ ...s => huggingface_transformers.int.test.ts} | 2 +- .../src/load/import_constants.ts | 1 + yarn.lock | 245 +++++++++++------- 22 files changed, 344 insertions(+), 132 deletions(-) create mode 100644 libs/langchain-community/src/embeddings/huggingface_transformers.ts rename libs/langchain-community/src/embeddings/tests/{hf_transformers.int.test.ts => huggingface_transformers.int.test.ts} (91%) diff --git a/docs/api_refs/blacklisted-entrypoints.json b/docs/api_refs/blacklisted-entrypoints.json index 419d8800827d..e1b4fa28e3a6 100644 --- a/docs/api_refs/blacklisted-entrypoints.json +++ b/docs/api_refs/blacklisted-entrypoints.json @@ -15,6 +15,7 @@ "../../langchain/src/embeddings/tensorflow.ts", "../../langchain/src/embeddings/hf.ts", "../../langchain/src/embeddings/hf_transformers.ts", + "../../langchain/src/embeddings/huggingface_transformers.ts", "../../langchain/src/embeddings/googlevertexai.ts", "../../langchain/src/embeddings/googlepalm.ts", "../../langchain/src/embeddings/minimax.ts", diff --git a/docs/core_docs/docs/integrations/text_embedding/transformers.mdx b/docs/core_docs/docs/integrations/text_embedding/transformers.mdx index dc75291a39de..c789a125aab8 100644 --- a/docs/core_docs/docs/integrations/text_embedding/transformers.mdx +++ b/docs/core_docs/docs/integrations/text_embedding/transformers.mdx @@ -8,10 +8,10 @@ It runs locally and even works directly in the browser, allowing you to create w ## Setup -You'll need to install the [@xenova/transformers](https://www.npmjs.com/package/@xenova/transformers) package as a peer dependency: +You'll need to install the [@huggingface/transformers](https://www.npmjs.com/package/@huggingface/transformers) package as a peer dependency: ```bash npm2yarn -npm install @xenova/transformers +npm install @huggingface/transformers ``` import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; diff --git a/environment_tests/test-exports-cjs/src/import.js b/environment_tests/test-exports-cjs/src/import.js index 752cfdea37ba..6723331f7dc9 100644 --- a/environment_tests/test-exports-cjs/src/import.js +++ b/environment_tests/test-exports-cjs/src/import.js @@ -3,7 +3,7 @@ async function test() { const { OpenAI } = await import("@langchain/openai"); const { LLMChain } = await import("langchain/chains"); const { ChatPromptTemplate } = await import("@langchain/core/prompts"); - const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/hf_transformers"); + const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/huggingface_transformers"); const { Document } = await import("@langchain/core/documents"); const { MemoryVectorStore } = await import("langchain/vectorstores/memory"); diff --git a/environment_tests/test-exports-cjs/src/index.mjs b/environment_tests/test-exports-cjs/src/index.mjs index 632b8081fbaa..7f30afdc81d6 100644 --- a/environment_tests/test-exports-cjs/src/index.mjs +++ b/environment_tests/test-exports-cjs/src/index.mjs @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai"; import { LLMChain } from "langchain/chains"; import { MemoryVectorStore } from "langchain/vectorstores/memory"; import { ChatPromptTemplate } from "@langchain/core/prompts"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; import { Document } from "@langchain/core/documents"; // Test exports diff --git a/environment_tests/test-exports-cjs/src/index.ts b/environment_tests/test-exports-cjs/src/index.ts index d2dcb9ebab0d..c4077382afd4 100644 --- a/environment_tests/test-exports-cjs/src/index.ts +++ b/environment_tests/test-exports-cjs/src/index.ts @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai"; import { LLMChain } from "langchain/chains"; import { ChatPromptTemplate } from "@langchain/core/prompts"; import { MemoryVectorStore } from "langchain/vectorstores/memory"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; import { Document } from "@langchain/core/documents"; async function test(useAzure: boolean = false) { @@ -25,7 +25,9 @@ async function test(useAzure: boolean = false) { openAIApiKey: "sk-XXXX", }; - const vs = new MemoryVectorStore(new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2" })); + const vs = new MemoryVectorStore( + new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2" }) + ); await vs.addVectors( [ diff --git a/environment_tests/test-exports-cjs/src/require.js b/environment_tests/test-exports-cjs/src/require.js index 1343f8587f35..f9110d7cd71f 100644 --- a/environment_tests/test-exports-cjs/src/require.js +++ b/environment_tests/test-exports-cjs/src/require.js @@ -3,7 +3,7 @@ const { OpenAI } = require("@langchain/openai"); const { LLMChain } = require("langchain/chains"); const { ChatPromptTemplate } = require("@langchain/core/prompts"); const { MemoryVectorStore } = require("langchain/vectorstores/memory"); -const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/hf_transformers"); +const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/huggingface_transformers"); const { Document } = require("@langchain/core/documents"); async function test() { diff --git a/environment_tests/test-exports-esm/src/import.cjs b/environment_tests/test-exports-esm/src/import.cjs index 6837754c442c..6ab7bc73355e 100644 --- a/environment_tests/test-exports-esm/src/import.cjs +++ b/environment_tests/test-exports-esm/src/import.cjs @@ -4,7 +4,7 @@ async function test() { const { LLMChain } = await import("langchain/chains"); const { ChatPromptTemplate } = await import("@langchain/core/prompts"); const { MemoryVectorStore } = await import("langchain/vectorstores/memory"); - const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/hf_transformers"); + const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/huggingface_transformers"); const { Document } = await import("@langchain/core/documents"); // Test exports diff --git a/environment_tests/test-exports-esm/src/index.js b/environment_tests/test-exports-esm/src/index.js index 2347699ee1dc..0046911c1adb 100644 --- a/environment_tests/test-exports-esm/src/index.js +++ b/environment_tests/test-exports-esm/src/index.js @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai"; import { LLMChain } from "langchain/chains"; import { ChatPromptTemplate } from "@langchain/core/prompts"; import { MemoryVectorStore } from "langchain/vectorstores/memory"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; import { Document } from "@langchain/core/documents"; import { CallbackManager } from "@langchain/core/callbacks/manager"; diff --git a/environment_tests/test-exports-esm/src/index.ts b/environment_tests/test-exports-esm/src/index.ts index c29f419c07a8..2d55732c8758 100644 --- a/environment_tests/test-exports-esm/src/index.ts +++ b/environment_tests/test-exports-esm/src/index.ts @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai"; import { LLMChain } from "langchain/chains"; import { ChatPromptTemplate } from "@langchain/core/prompts"; import { MemoryVectorStore } from "langchain/vectorstores/memory"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; import { Document } from "@langchain/core/documents"; async function test(useAzure: boolean = false) { @@ -24,7 +24,9 @@ async function test(useAzure: boolean = false) { openAIApiKey: "sk-XXXX", }; - const vs = new MemoryVectorStore(new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2", })); + const vs = new MemoryVectorStore( + new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2" }) + ); await vs.addVectors( [ diff --git a/environment_tests/test-exports-esm/src/require.cjs b/environment_tests/test-exports-esm/src/require.cjs index 31461c4b6c23..ab0db459e6c1 100644 --- a/environment_tests/test-exports-esm/src/require.cjs +++ b/environment_tests/test-exports-esm/src/require.cjs @@ -3,7 +3,7 @@ const { OpenAI } = require("@langchain/openai"); const { LLMChain } = require("langchain/chains"); const { ChatPromptTemplate } = require("@langchain/core/prompts"); const { MemoryVectorStore } = require("langchain/vectorstores/memory"); -const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/hf_transformers"); +const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/huggingface_transformers"); const { Document } = require("@langchain/core/documents"); async function test() { diff --git a/examples/src/models/embeddings/hf_transformers.ts b/examples/src/models/embeddings/hf_transformers.ts index 2643eabe7c38..160530d9a30f 100644 --- a/examples/src/models/embeddings/hf_transformers.ts +++ b/examples/src/models/embeddings/hf_transformers.ts @@ -1,4 +1,4 @@ -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; const model = new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2", diff --git a/examples/src/use_cases/local_retrieval_qa/chain.ts b/examples/src/use_cases/local_retrieval_qa/chain.ts index c5da25e3a6f9..38bbceb4199b 100644 --- a/examples/src/use_cases/local_retrieval_qa/chain.ts +++ b/examples/src/use_cases/local_retrieval_qa/chain.ts @@ -2,7 +2,7 @@ import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/ import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters"; import { HNSWLib } from "@langchain/community/vectorstores/hnswlib"; import { Ollama } from "@langchain/community/llms/ollama"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; import { formatDocumentsAsString } from "langchain/util/document"; import { PromptTemplate } from "@langchain/core/prompts"; import { diff --git a/examples/src/use_cases/local_retrieval_qa/load_documents.ts b/examples/src/use_cases/local_retrieval_qa/load_documents.ts index 9e449b2cc532..7efea9a161ea 100644 --- a/examples/src/use_cases/local_retrieval_qa/load_documents.ts +++ b/examples/src/use_cases/local_retrieval_qa/load_documents.ts @@ -1,7 +1,7 @@ import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/cheerio"; import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters"; import { HNSWLib } from "@langchain/community/vectorstores/hnswlib"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; const loader = new CheerioWebBaseLoader( "https://lilianweng.github.io/posts/2023-06-23-agent/" diff --git a/examples/src/use_cases/local_retrieval_qa/qa_chain.ts b/examples/src/use_cases/local_retrieval_qa/qa_chain.ts index 949918067369..6bb4711e6276 100644 --- a/examples/src/use_cases/local_retrieval_qa/qa_chain.ts +++ b/examples/src/use_cases/local_retrieval_qa/qa_chain.ts @@ -3,7 +3,7 @@ import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/ import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters"; import { HNSWLib } from "@langchain/community/vectorstores/hnswlib"; import { Ollama } from "@langchain/community/llms/ollama"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; import { PromptTemplate } from "@langchain/core/prompts"; const loader = new CheerioWebBaseLoader( diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index dcef7c9a15d9..442ff89b42e9 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -174,6 +174,10 @@ embeddings/hf_transformers.cjs embeddings/hf_transformers.js embeddings/hf_transformers.d.ts embeddings/hf_transformers.d.cts +embeddings/huggingface_transformers.cjs +embeddings/huggingface_transformers.js +embeddings/huggingface_transformers.d.ts +embeddings/huggingface_transformers.d.cts embeddings/ibm.cjs embeddings/ibm.js embeddings/ibm.d.ts diff --git a/libs/langchain-community/langchain.config.js b/libs/langchain-community/langchain.config.js index f0c1914d5e78..0ea7bff3a182 100644 --- a/libs/langchain-community/langchain.config.js +++ b/libs/langchain-community/langchain.config.js @@ -79,6 +79,7 @@ export const config = { "embeddings/gradient_ai": "embeddings/gradient_ai", "embeddings/hf": "embeddings/hf", "embeddings/hf_transformers": "embeddings/hf_transformers", + "embeddings/huggingface_transformers": "embeddings/huggingface_transformers", "embeddings/ibm": "embeddings/ibm", "embeddings/jina": "embeddings/jina", "embeddings/llama_cpp": "embeddings/llama_cpp", @@ -355,6 +356,7 @@ export const config = { "embeddings/tensorflow", "embeddings/hf", "embeddings/hf_transformers", + "embeddings/huggingface_transformers", "embeddings/ibm", "embeddings/jina", "embeddings/llama_cpp", diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index bceb60def832..81077ddd3343 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -78,6 +78,7 @@ "@google-cloud/storage": "^7.7.0", "@gradientai/nodejs-sdk": "^1.2.0", "@huggingface/inference": "^2.6.4", + "@huggingface/transformers": "^3.2.3", "@ibm-cloud/watsonx-ai": "^1.3.0", "@jest/globals": "^29.5.0", "@lancedb/lancedb": "^0.13.0", @@ -134,7 +135,6 @@ "@vercel/postgres": "^0.5.0", "@writerai/writer-sdk": "^0.40.2", "@xata.io/client": "^0.28.0", - "@xenova/transformers": "^2.17.2", "@zilliz/milvus2-sdk-node": ">=2.3.5", "apify-client": "^2.7.1", "assemblyai": "^4.6.0", @@ -249,6 +249,7 @@ "@google-cloud/storage": "^6.10.1 || ^7.7.0", "@gradientai/nodejs-sdk": "^1.2.0", "@huggingface/inference": "^2.6.4", + "@huggingface/transformers": "^3.2.3", "@ibm-cloud/watsonx-ai": "*", "@lancedb/lancedb": "^0.12.0", "@langchain/core": ">=0.2.21 <0.4.0", @@ -282,7 +283,6 @@ "@vercel/postgres": "^0.5.0", "@writerai/writer-sdk": "^0.40.2", "@xata.io/client": "^0.28.0", - "@xenova/transformers": "^2.17.2", "@zilliz/milvus2-sdk-node": ">=2.3.5", "apify-client": "^2.7.1", "assemblyai": "^4.6.0", @@ -430,6 +430,9 @@ "@huggingface/inference": { "optional": true }, + "@huggingface/transformers": { + "optional": true + }, "@lancedb/lancedb": { "optional": true }, @@ -523,9 +526,6 @@ "@xata.io/client": { "optional": true }, - "@xenova/transformers": { - "optional": true - }, "@zilliz/milvus2-sdk-node": { "optional": true }, @@ -1113,6 +1113,15 @@ "import": "./embeddings/hf_transformers.js", "require": "./embeddings/hf_transformers.cjs" }, + "./embeddings/huggingface_transformers": { + "types": { + "import": "./embeddings/huggingface_transformers.d.ts", + "require": "./embeddings/huggingface_transformers.d.cts", + "default": "./embeddings/huggingface_transformers.d.ts" + }, + "import": "./embeddings/huggingface_transformers.js", + "require": "./embeddings/huggingface_transformers.cjs" + }, "./embeddings/ibm": { "types": { "import": "./embeddings/ibm.d.ts", @@ -3336,6 +3345,10 @@ "embeddings/hf_transformers.js", "embeddings/hf_transformers.d.ts", "embeddings/hf_transformers.d.cts", + "embeddings/huggingface_transformers.cjs", + "embeddings/huggingface_transformers.js", + "embeddings/huggingface_transformers.d.ts", + "embeddings/huggingface_transformers.d.cts", "embeddings/ibm.cjs", "embeddings/ibm.js", "embeddings/ibm.d.ts", diff --git a/libs/langchain-community/src/embeddings/hf_transformers.ts b/libs/langchain-community/src/embeddings/hf_transformers.ts index 08175dccbb7a..dba24638cf28 100644 --- a/libs/langchain-community/src/embeddings/hf_transformers.ts +++ b/libs/langchain-community/src/embeddings/hf_transformers.ts @@ -1,3 +1,5 @@ +/* eslint-disable */ +// @ts-nocheck import type { PretrainedOptions, FeatureExtractionPipelineOptions, @@ -6,6 +8,11 @@ import type { import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings"; import { chunkArray } from "@langchain/core/utils/chunk_array"; +/** + * @deprecated Import from + * "@langchain/community/embeddings/huggingface_transformers" + * instead and use the new "@huggingface/transformers" peer dependency. + */ export interface HuggingFaceTransformersEmbeddingsParams extends EmbeddingsParams { /** @@ -13,6 +20,7 @@ export interface HuggingFaceTransformersEmbeddingsParams * Alias for `model` */ modelName: string; + /** Model name to use */ model: string; @@ -42,24 +50,10 @@ export interface HuggingFaceTransformersEmbeddingsParams */ pipelineOptions?: FeatureExtractionPipelineOptions; } - /** - * @example - * ```typescript - * const model = new HuggingFaceTransformersEmbeddings({ - * model: "Xenova/all-MiniLM-L6-v2", - * }); - * - * // Embed a single query - * const res = await model.embedQuery( - * "What would be a good company name for a company that makes colorful socks?" - * ); - * console.log({ res }); - * - * // Embed multiple documents - * const documentRes = await model.embedDocuments(["Hello world", "Bye bye"]); - * console.log({ documentRes }); - * ``` + * @deprecated Import from + * "@langchain/community/embeddings/huggingface_transformers" + * instead and use the new "@huggingface/transformers" peer dependency. */ export class HuggingFaceTransformersEmbeddings extends Embeddings @@ -83,7 +77,6 @@ export class HuggingFaceTransformersEmbeddings constructor(fields?: Partial) { super(fields ?? {}); - this.modelName = fields?.model ?? fields?.modelName ?? this.model; this.model = this.modelName; this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines; @@ -95,27 +88,22 @@ export class HuggingFaceTransformersEmbeddings ...fields?.pipelineOptions, }; } - async embedDocuments(texts: string[]): Promise { const batches = chunkArray( this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, this.batchSize ); - const batchRequests = batches.map((batch) => this.runEmbedding(batch)); const batchResponses = await Promise.all(batchRequests); const embeddings: number[][] = []; - for (let i = 0; i < batchResponses.length; i += 1) { const batchResponse = batchResponses[i]; for (let j = 0; j < batchResponse.length; j += 1) { embeddings.push(batchResponse[j]); } } - return embeddings; } - async embedQuery(text: string): Promise { const data = await this.runEmbedding([ this.stripNewLines ? text.replace(/\n/g, " ") : text, diff --git a/libs/langchain-community/src/embeddings/huggingface_transformers.ts b/libs/langchain-community/src/embeddings/huggingface_transformers.ts new file mode 100644 index 000000000000..03f458c51f38 --- /dev/null +++ b/libs/langchain-community/src/embeddings/huggingface_transformers.ts @@ -0,0 +1,128 @@ +import type { + PretrainedOptions, + FeatureExtractionPipelineOptions, + FeatureExtractionPipeline, +} from "@huggingface/transformers"; +import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings"; +import { chunkArray } from "@langchain/core/utils/chunk_array"; + +export interface HuggingFaceTransformersEmbeddingsParams + extends EmbeddingsParams { + /** Model name to use */ + model: string; + + /** + * Timeout to use when making requests to OpenAI. + */ + timeout?: number; + + /** + * The maximum number of documents to embed in a single request. + */ + batchSize?: number; + + /** + * Whether to strip new lines from the input text. This is recommended by + * OpenAI, but may not be suitable for all use cases. + */ + stripNewLines?: boolean; + + /** + * Optional parameters for the pretrained model. + */ + pretrainedOptions?: PretrainedOptions; + + /** + * Optional parameters for the pipeline. + */ + pipelineOptions?: FeatureExtractionPipelineOptions; +} + +/** + * @example + * ```typescript + * const model = new HuggingFaceTransformersEmbeddings({ + * model: "Xenova/all-MiniLM-L6-v2", + * }); + * + * // Embed a single query + * const res = await model.embedQuery( + * "What would be a good company name for a company that makes colorful socks?" + * ); + * console.log({ res }); + * + * // Embed multiple documents + * const documentRes = await model.embedDocuments(["Hello world", "Bye bye"]); + * console.log({ documentRes }); + * ``` + */ +export class HuggingFaceTransformersEmbeddings + extends Embeddings + implements HuggingFaceTransformersEmbeddingsParams +{ + model = "Xenova/all-MiniLM-L6-v2"; + + batchSize = 512; + + stripNewLines = true; + + timeout?: number; + + pretrainedOptions?: PretrainedOptions; + + pipelineOptions?: FeatureExtractionPipelineOptions; + + private pipelinePromise: Promise; + + constructor(fields?: Partial) { + super(fields ?? {}); + + this.model = fields?.model ?? this.model; + this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines; + this.timeout = fields?.timeout; + this.pretrainedOptions = fields?.pretrainedOptions ?? {}; + this.pipelineOptions = { + pooling: "mean", + normalize: true, + ...fields?.pipelineOptions, + }; + } + + async embedDocuments(texts: string[]): Promise { + const batches = chunkArray( + this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, + this.batchSize + ); + + const batchRequests = batches.map((batch) => this.runEmbedding(batch)); + const batchResponses = await Promise.all(batchRequests); + const embeddings: number[][] = []; + + for (let i = 0; i < batchResponses.length; i += 1) { + const batchResponse = batchResponses[i]; + for (let j = 0; j < batchResponse.length; j += 1) { + embeddings.push(batchResponse[j]); + } + } + + return embeddings; + } + + async embedQuery(text: string): Promise { + const data = await this.runEmbedding([ + this.stripNewLines ? text.replace(/\n/g, " ") : text, + ]); + return data[0]; + } + + private async runEmbedding(texts: string[]) { + const pipe = await (this.pipelinePromise ??= ( + await import("@huggingface/transformers") + ).pipeline("feature-extraction", this.model, this.pretrainedOptions)); + + return this.caller.call(async () => { + const output = await pipe(texts, this.pipelineOptions); + return output.tolist(); + }); + } +} diff --git a/libs/langchain-community/src/embeddings/tests/hf_transformers.int.test.ts b/libs/langchain-community/src/embeddings/tests/huggingface_transformers.int.test.ts similarity index 91% rename from libs/langchain-community/src/embeddings/tests/hf_transformers.int.test.ts rename to libs/langchain-community/src/embeddings/tests/huggingface_transformers.int.test.ts index de67cc9a63b0..e10caf1e7032 100644 --- a/libs/langchain-community/src/embeddings/tests/hf_transformers.int.test.ts +++ b/libs/langchain-community/src/embeddings/tests/huggingface_transformers.int.test.ts @@ -1,5 +1,5 @@ import { test, expect } from "@jest/globals"; -import { HuggingFaceTransformersEmbeddings } from "../hf_transformers.js"; +import { HuggingFaceTransformersEmbeddings } from "../huggingface_transformers.js"; import { HNSWLib } from "../../vectorstores/hnswlib.js"; test("HuggingFaceTransformersEmbeddings", async () => { diff --git a/libs/langchain-community/src/load/import_constants.ts b/libs/langchain-community/src/load/import_constants.ts index 014d418e872d..6ac412ca9543 100644 --- a/libs/langchain-community/src/load/import_constants.ts +++ b/libs/langchain-community/src/load/import_constants.ts @@ -15,6 +15,7 @@ export const optionalImportEntrypoints: string[] = [ "langchain_community/embeddings/gradient_ai", "langchain_community/embeddings/hf", "langchain_community/embeddings/hf_transformers", + "langchain_community/embeddings/huggingface_transformers", "langchain_community/embeddings/ibm", "langchain_community/embeddings/jina", "langchain_community/embeddings/llama_cpp", diff --git a/yarn.lock b/yarn.lock index abae3190907d..590f795ce465 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10560,13 +10560,6 @@ __metadata: languageName: node linkType: hard -"@huggingface/jinja@npm:^0.2.2": - version: 0.2.2 - resolution: "@huggingface/jinja@npm:0.2.2" - checksum: 8a6e3e287863d437920990afa2ca25d83c51997bd5ba0325ea90633e52469c2d901178cbd758cc362b45ad1c9521fccf372884fd59e58d2916d6b2e5bb15f776 - languageName: node - linkType: hard - "@huggingface/jinja@npm:^0.3.1": version: 0.3.1 resolution: "@huggingface/jinja@npm:0.3.1" @@ -10574,6 +10567,25 @@ __metadata: languageName: node linkType: hard +"@huggingface/jinja@npm:^0.3.2": + version: 0.3.2 + resolution: "@huggingface/jinja@npm:0.3.2" + checksum: 4bc7d00b6f8655a0032c2d89e38a095d0a87ef81a1c12fb6fd0404e1319e1ef6eef87734502689c1df39db4e77a7bb5996e7b6c1b4d6a768ecfa5a48f2a939a7 + languageName: node + linkType: hard + +"@huggingface/transformers@npm:^3.2.3": + version: 3.2.4 + resolution: "@huggingface/transformers@npm:3.2.4" + dependencies: + "@huggingface/jinja": ^0.3.2 + onnxruntime-node: 1.20.1 + onnxruntime-web: 1.21.0-dev.20241205-d27fecd3d3 + sharp: ^0.33.5 + checksum: fdff5cec1336fdb4ad923592d77348730f58263928a8c90d0f79aed7863e74a5521b9e99903c906a6e1c056fe0f81f811e4d403b62d3edb66da9389cff025acf + languageName: node + linkType: hard + "@humanwhocodes/config-array@npm:^0.11.11": version: 0.11.11 resolution: "@humanwhocodes/config-array@npm:0.11.11" @@ -10849,6 +10861,15 @@ __metadata: languageName: node linkType: hard +"@isaacs/fs-minipass@npm:^4.0.0": + version: 4.0.1 + resolution: "@isaacs/fs-minipass@npm:4.0.1" + dependencies: + minipass: ^7.0.4 + checksum: 5d36d289960e886484362d9eb6a51d1ea28baed5f5d0140bbe62b99bac52eaf06cc01c2bc0d3575977962f84f6b2c4387b043ee632216643d4787b0999465bf2 + languageName: node + linkType: hard + "@istanbuljs/load-nyc-config@npm:^1.0.0": version: 1.1.0 resolution: "@istanbuljs/load-nyc-config@npm:1.1.0" @@ -11817,6 +11838,7 @@ __metadata: "@google-cloud/storage": ^7.7.0 "@gradientai/nodejs-sdk": ^1.2.0 "@huggingface/inference": ^2.6.4 + "@huggingface/transformers": ^3.2.3 "@ibm-cloud/watsonx-ai": ^1.3.0 "@jest/globals": ^29.5.0 "@lancedb/lancedb": ^0.13.0 @@ -11874,7 +11896,6 @@ __metadata: "@vercel/postgres": ^0.5.0 "@writerai/writer-sdk": ^0.40.2 "@xata.io/client": ^0.28.0 - "@xenova/transformers": ^2.17.2 "@zilliz/milvus2-sdk-node": ">=2.3.5" apify-client: ^2.7.1 assemblyai: ^4.6.0 @@ -11997,6 +12018,7 @@ __metadata: "@google-cloud/storage": ^6.10.1 || ^7.7.0 "@gradientai/nodejs-sdk": ^1.2.0 "@huggingface/inference": ^2.6.4 + "@huggingface/transformers": ^3.2.3 "@ibm-cloud/watsonx-ai": "*" "@lancedb/lancedb": ^0.12.0 "@langchain/core": ">=0.2.21 <0.4.0" @@ -12030,7 +12052,6 @@ __metadata: "@vercel/postgres": ^0.5.0 "@writerai/writer-sdk": ^0.40.2 "@xata.io/client": ^0.28.0 - "@xenova/transformers": ^2.17.2 "@zilliz/milvus2-sdk-node": ">=2.3.5" apify-client: ^2.7.1 assemblyai: ^4.6.0 @@ -12150,6 +12171,8 @@ __metadata: optional: true "@huggingface/inference": optional: true + "@huggingface/transformers": + optional: true "@lancedb/lancedb": optional: true "@layerup/layerup-security": @@ -12212,8 +12235,6 @@ __metadata: optional: true "@xata.io/client": optional: true - "@xenova/transformers": - optional: true "@zilliz/milvus2-sdk-node": optional: true apify-client: @@ -20994,21 +21015,6 @@ __metadata: languageName: node linkType: hard -"@xenova/transformers@npm:^2.17.2": - version: 2.17.2 - resolution: "@xenova/transformers@npm:2.17.2" - dependencies: - "@huggingface/jinja": ^0.2.2 - onnxruntime-node: 1.14.0 - onnxruntime-web: 1.14.0 - sharp: ^0.32.0 - dependenciesMeta: - onnxruntime-node: - optional: true - checksum: 5d49219995f401eedab6e0dcde6ad15ce5df0466388448703ca191e083bb0dc95692c1b539827d47399410d089cb078c47c862b0c550e34b54670fc435e83941 - languageName: node - linkType: hard - "@xmldom/xmldom@npm:^0.8.10, @xmldom/xmldom@npm:^0.8.6": version: 0.8.10 resolution: "@xmldom/xmldom@npm:0.8.10" @@ -23189,6 +23195,13 @@ __metadata: languageName: node linkType: hard +"chownr@npm:^3.0.0": + version: 3.0.0 + resolution: "chownr@npm:3.0.0" + checksum: fd73a4bab48b79e66903fe1cafbdc208956f41ea4f856df883d0c7277b7ab29fd33ee65f93b2ec9192fc0169238f2f8307b7735d27c155821d886b84aa97aa8d + languageName: node + linkType: hard + "chromadb@npm:^1.5.3": version: 1.5.3 resolution: "chromadb@npm:1.5.3" @@ -25600,7 +25613,7 @@ __metadata: languageName: node linkType: hard -"detect-libc@npm:2.0.2, detect-libc@npm:^2.0.2": +"detect-libc@npm:2.0.2": version: 2.0.2 resolution: "detect-libc@npm:2.0.2" checksum: 2b2cd3649b83d576f4be7cc37eb3b1815c79969c8b1a03a40a4d55d83bc74d010753485753448eacb98784abf22f7dbd3911fd3b60e29fda28fed2d1a997944d @@ -29440,6 +29453,22 @@ __metadata: languageName: node linkType: hard +"glob@npm:^10.3.7": + version: 10.4.5 + resolution: "glob@npm:10.4.5" + dependencies: + foreground-child: ^3.1.0 + jackspeak: ^3.1.2 + minimatch: ^9.0.4 + minipass: ^7.1.2 + package-json-from-dist: ^1.0.0 + path-scurry: ^1.11.1 + bin: + glob: dist/esm/bin.mjs + checksum: 0bc725de5e4862f9f387fd0f2b274baf16850dcd2714502ccf471ee401803997983e2c05590cb65f9675a3c6f2a58e7a53f9e365704108c6ad3cbf1d60934c4a + languageName: node + linkType: hard + "glob@npm:^7.0.0, glob@npm:^7.1.3, glob@npm:^7.1.4, glob@npm:^7.1.6": version: 7.2.3 resolution: "glob@npm:7.2.3" @@ -32047,6 +32076,19 @@ __metadata: languageName: node linkType: hard +"jackspeak@npm:^3.1.2": + version: 3.4.3 + resolution: "jackspeak@npm:3.4.3" + dependencies: + "@isaacs/cliui": ^8.0.2 + "@pkgjs/parseargs": ^0.11.0 + dependenciesMeta: + "@pkgjs/parseargs": + optional: true + checksum: be31027fc72e7cc726206b9f560395604b82e0fddb46c4cbf9f97d049bcef607491a5afc0699612eaa4213ca5be8fd3e1e7cd187b3040988b65c9489838a7c00 + languageName: node + linkType: hard + "javascript-stringify@npm:^2.0.1": version: 2.1.0 resolution: "javascript-stringify@npm:2.1.0" @@ -34025,7 +34067,7 @@ __metadata: languageName: node linkType: hard -"long@npm:*, long@npm:^5.2.1, long@npm:~5.2.3": +"long@npm:*, long@npm:^5.2.1, long@npm:^5.2.3, long@npm:~5.2.3": version: 5.2.3 resolution: "long@npm:5.2.3" checksum: 885ede7c3de4facccbd2cacc6168bae3a02c3e836159ea4252c87b6e34d40af819824b2d4edce330bfb5c4d6e8ce3ec5864bdcf9473fa1f53a4f8225860e5897 @@ -34845,6 +34887,13 @@ __metadata: languageName: node linkType: hard +"minipass@npm:^7.1.2": + version: 7.1.2 + resolution: "minipass@npm:7.1.2" + checksum: 2bfd325b95c555f2b4d2814d49325691c7bee937d753814861b0b49d5edcda55cbbf22b6b6a60bb91eddac8668771f03c5ff647dcd9d0f798e9548b9cdc46ee3 + languageName: node + linkType: hard + "minizlib@npm:^2.0.0, minizlib@npm:^2.1.1, minizlib@npm:^2.1.2": version: 2.1.2 resolution: "minizlib@npm:2.1.2" @@ -34855,6 +34904,16 @@ __metadata: languageName: node linkType: hard +"minizlib@npm:^3.0.1": + version: 3.0.1 + resolution: "minizlib@npm:3.0.1" + dependencies: + minipass: ^7.0.4 + rimraf: ^5.0.5 + checksum: da0a53899252380475240c587e52c824f8998d9720982ba5c4693c68e89230718884a209858c156c6e08d51aad35700a3589987e540593c36f6713fe30cd7338 + languageName: node + linkType: hard + "mitt@npm:3.0.1": version: 3.0.1 resolution: "mitt@npm:3.0.1" @@ -35376,15 +35435,6 @@ __metadata: languageName: node linkType: hard -"node-addon-api@npm:^6.1.0": - version: 6.1.0 - resolution: "node-addon-api@npm:6.1.0" - dependencies: - node-gyp: latest - checksum: 3a539510e677cfa3a833aca5397300e36141aca064cdc487554f2017110709a03a95da937e98c2a14ec3c626af7b2d1b6dabe629a481f9883143d0d5bff07bf2 - languageName: node - linkType: hard - "node-addon-api@npm:^7.0.0": version: 7.0.0 resolution: "node-addon-api@npm:7.0.0" @@ -36099,42 +36149,42 @@ __metadata: languageName: node linkType: hard -"onnx-proto@npm:^4.0.4": - version: 4.0.4 - resolution: "onnx-proto@npm:4.0.4" - dependencies: - protobufjs: ^6.8.8 - checksum: 4122ea200bb4a7c93a464c5a49351025537f5b2c9a5848a9b090700437e6c458a44491096502324a3d7e6fb388be4967a824d12ac18d7be6721d0d5779400fd5 +"onnxruntime-common@npm:1.20.1": + version: 1.20.1 + resolution: "onnxruntime-common@npm:1.20.1" + checksum: 5cde8fae546c9a4a2d8f13e18cc4c346d77e733d08d1f6b95f4958fb09618592113d232db64049fafadbd18913ec8564e6c06c47dadc4c2aac8df4ed18b2956c languageName: node linkType: hard -"onnxruntime-common@npm:~1.14.0": - version: 1.14.0 - resolution: "onnxruntime-common@npm:1.14.0" - checksum: 6f0dda57440e94ad8c3df80c9812b38651daa4482af4159bada6cf19f8e09a5258994e57038acdfd54ecab7b9779e0e8ce37b3315ee6c48dd6c1c943fd15fa13 +"onnxruntime-common@npm:1.21.0-dev.20241205-6ed77cc374": + version: 1.21.0-dev.20241205-6ed77cc374 + resolution: "onnxruntime-common@npm:1.21.0-dev.20241205-6ed77cc374" + checksum: f490d6b1a8c059ce5665a468ac1c38de4c3729ead0bae173a0c9334c32a67fb2899972b6e185cc6c42f05e61f2c3da2738a814dbc89b5577206a7b17e29f4190 languageName: node linkType: hard -"onnxruntime-node@npm:1.14.0": - version: 1.14.0 - resolution: "onnxruntime-node@npm:1.14.0" +"onnxruntime-node@npm:1.20.1": + version: 1.20.1 + resolution: "onnxruntime-node@npm:1.20.1" dependencies: - onnxruntime-common: ~1.14.0 + onnxruntime-common: 1.20.1 + tar: ^7.0.1 + checksum: 6b5467eb1d08e1f5931ed1bff77e180f8600be917b690bad5edcfad61fcb797d29f74c5cff5eeb1f8bc95a36d261647d68ca88e149b0aa88412d8dea90901042 conditions: (os=win32 | os=darwin | os=linux) languageName: node linkType: hard -"onnxruntime-web@npm:1.14.0": - version: 1.14.0 - resolution: "onnxruntime-web@npm:1.14.0" +"onnxruntime-web@npm:1.21.0-dev.20241205-d27fecd3d3": + version: 1.21.0-dev.20241205-d27fecd3d3 + resolution: "onnxruntime-web@npm:1.21.0-dev.20241205-d27fecd3d3" dependencies: flatbuffers: ^1.12.0 guid-typescript: ^1.0.9 - long: ^4.0.0 - onnx-proto: ^4.0.4 - onnxruntime-common: ~1.14.0 + long: ^5.2.3 + onnxruntime-common: 1.21.0-dev.20241205-6ed77cc374 platform: ^1.3.6 - checksum: 6faa8886683c301e267dad336a8f819a33253f3b3e93c0fe7af7df2aa45e61f6737b43119d68a448d17d08cbcd83e17607f9242e2222d5b4f9552351ddaa3289 + protobufjs: ^7.2.4 + checksum: f668b638440dc8122209ce04c9e06b449bd2d7d0ce05be0d0618468d98746310e4a4d1a15afea30c86e98cea0053496d1c0fef5e6785153f16be8530f24018b8 languageName: node linkType: hard @@ -36603,6 +36653,13 @@ __metadata: languageName: node linkType: hard +"package-json-from-dist@npm:^1.0.0": + version: 1.0.1 + resolution: "package-json-from-dist@npm:1.0.1" + checksum: 58ee9538f2f762988433da00e26acc788036914d57c71c246bf0be1b60cdbd77dd60b6a3e1a30465f0b248aeb80079e0b34cb6050b1dfa18c06953bb1cbc7602 + languageName: node + linkType: hard + "package-json@npm:^10.0.0": version: 10.0.1 resolution: "package-json@npm:10.0.1" @@ -36898,6 +36955,16 @@ __metadata: languageName: node linkType: hard +"path-scurry@npm:^1.11.1": + version: 1.11.1 + resolution: "path-scurry@npm:1.11.1" + dependencies: + lru-cache: ^10.2.0 + minipass: ^5.0.0 || ^6.0.2 || ^7.0.0 + checksum: 890d5abcd593a7912dcce7cf7c6bf7a0b5648e3dee6caf0712c126ca0a65c7f3d7b9d769072a4d1baf370f61ce493ab5b038d59988688e0c5f3f646ee3c69023 + languageName: node + linkType: hard + "path-scurry@npm:^1.7.0": version: 1.9.2 resolution: "path-scurry@npm:1.9.2" @@ -39617,6 +39684,17 @@ __metadata: languageName: node linkType: hard +"rimraf@npm:^5.0.5": + version: 5.0.10 + resolution: "rimraf@npm:5.0.10" + dependencies: + glob: ^10.3.7 + bin: + rimraf: dist/esm/bin.mjs + checksum: 50e27388dd2b3fa6677385fc1e2966e9157c89c86853b96d02e6915663a96b7ff4d590e14f6f70e90f9b554093aa5dbc05ac3012876be558c06a65437337bc05 + languageName: node + linkType: hard + "robust-predicates@npm:^3.0.2": version: 3.0.2 resolution: "robust-predicates@npm:3.0.2" @@ -40285,23 +40363,6 @@ __metadata: languageName: node linkType: hard -"sharp@npm:^0.32.0": - version: 0.32.4 - resolution: "sharp@npm:0.32.4" - dependencies: - color: ^4.2.3 - detect-libc: ^2.0.2 - node-addon-api: ^6.1.0 - node-gyp: latest - prebuild-install: ^7.1.1 - semver: ^7.5.4 - simple-get: ^4.0.1 - tar-fs: ^3.0.4 - tunnel-agent: ^0.6.0 - checksum: 52e3cfe8fbba2623a9b935be8a3d00d6993a2c56c775ac5cc89b273826db95f029f68a0029a37f96dcb6790aa2e3c05a02599035535b319f50ab31f5d86a13f0 - languageName: node - linkType: hard - "sharp@npm:^0.33.5": version: 0.33.5 resolution: "sharp@npm:0.33.5" @@ -40479,7 +40540,7 @@ __metadata: languageName: node linkType: hard -"simple-get@npm:^4.0.0, simple-get@npm:^4.0.1": +"simple-get@npm:^4.0.0": version: 4.0.1 resolution: "simple-get@npm:4.0.1" dependencies: @@ -41555,17 +41616,6 @@ __metadata: languageName: node linkType: hard -"tar-fs@npm:^3.0.4": - version: 3.0.4 - resolution: "tar-fs@npm:3.0.4" - dependencies: - mkdirp-classic: ^0.5.2 - pump: ^3.0.0 - tar-stream: ^3.1.5 - checksum: dcf4054f9e92ca0efe61c2b3f612914fb259a47900aa908a63106513a6d006c899b426ada53eb88d9dbbf089b5724c8e90b96a2c4ca6171845fa14203d734e30 - languageName: node - linkType: hard - "tar-fs@npm:^3.0.6": version: 3.0.6 resolution: "tar-fs@npm:3.0.6" @@ -41664,6 +41714,20 @@ __metadata: languageName: node linkType: hard +"tar@npm:^7.0.1": + version: 7.4.3 + resolution: "tar@npm:7.4.3" + dependencies: + "@isaacs/fs-minipass": ^4.0.0 + chownr: ^3.0.0 + minipass: ^7.1.2 + minizlib: ^3.0.1 + mkdirp: ^3.0.1 + yallist: ^5.0.0 + checksum: 8485350c0688331c94493031f417df069b778aadb25598abdad51862e007c39d1dd5310702c7be4a6784731a174799d8885d2fde0484269aea205b724d7b2ffa + languageName: node + linkType: hard + "teeny-request@npm:^9.0.0": version: 9.0.0 resolution: "teeny-request@npm:9.0.0" @@ -44537,6 +44601,13 @@ __metadata: languageName: node linkType: hard +"yallist@npm:^5.0.0": + version: 5.0.0 + resolution: "yallist@npm:5.0.0" + checksum: eba51182400b9f35b017daa7f419f434424410691bbc5de4f4240cc830fdef906b504424992700dc047f16b4d99100a6f8b8b11175c193f38008e9c96322b6a5 + languageName: node + linkType: hard + "yaml-loader@npm:^0.8.0": version: 0.8.0 resolution: "yaml-loader@npm:0.8.0" From 33c3d73cbd9507895e2f8f6b34ce97126a53bda9 Mon Sep 17 00:00:00 2001 From: Hyun <48207131+KoreanThinker@users.noreply.github.com> Date: Wed, 1 Jan 2025 02:23:29 +0900 Subject: [PATCH 02/10] feat(community): Return metadata.title from CheerioDocumentLoader (#7437) --- libs/langchain-community/src/document_loaders/web/cheerio.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libs/langchain-community/src/document_loaders/web/cheerio.ts b/libs/langchain-community/src/document_loaders/web/cheerio.ts index 106b1ffe9d33..72d06aed2d10 100644 --- a/libs/langchain-community/src/document_loaders/web/cheerio.ts +++ b/libs/langchain-community/src/document_loaders/web/cheerio.ts @@ -130,8 +130,9 @@ export class CheerioWebBaseLoader */ async load(): Promise { const $ = await this.scrape(); + const title = $("title").text(); const text = $(this.selector).text(); - const metadata = { source: this.webPath }; + const metadata = { source: this.webPath, title }; return [new Document({ pageContent: text, metadata })]; } From 2abf88eaa67c63e2c89bb3d9b756f0832212c0a7 Mon Sep 17 00:00:00 2001 From: Charef Bahria <38106876+Fibii@users.noreply.github.com> Date: Tue, 31 Dec 2024 18:26:48 +0100 Subject: [PATCH 03/10] feat(community): Extend DocxLoader to load .doc files (#7421) --- .../document_loaders/file_loaders/docx.mdx | 46 +++++++++- libs/langchain-community/package.json | 6 ++ .../src/document_loaders/fs/docx.ts | 82 +++++++++++++++++- .../src/document_loaders/tests/docx.test.ts | 16 +++- .../tests/example_data/attention.doc | Bin 0 -> 56832 bytes yarn.lock | 33 +++++++ 6 files changed, 176 insertions(+), 7 deletions(-) create mode 100644 libs/langchain-community/src/document_loaders/tests/example_data/attention.doc diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/docx.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/docx.mdx index baaf464a5e5b..8e46cde7a1b8 100644 --- a/docs/core_docs/docs/integrations/document_loaders/file_loaders/docx.mdx +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/docx.mdx @@ -4,17 +4,38 @@ hide_table_of_contents: true # Docx files -This example goes over how to load data from docx files. +The `DocxLoader` allows you to extract text data from Microsoft Word documents. It supports both the modern `.docx` format and the legacy `.doc` format. Depending on the file type, additional dependencies are required. -# Setup +--- + +## Setup + +To use `DocxLoader`, you'll need the `@langchain/community` integration along with either `mammoth` or `word-extractor` package: + +- **`mammoth`**: For processing `.docx` files. +- **`word-extractor`**: For handling `.doc` files. + +### Installation + +#### For `.docx` Files ```bash npm2yarn npm install @langchain/community @langchain/core mammoth ``` -# Usage +#### For `.doc` Files + +```bash npm2yarn +npm install @langchain/community @langchain/core word-extractor +``` + +## Usage + +### Loading `.docx` Files -```typescript +For `.docx` files, there is no need to explicitly specify any parameters when initializing the loader: + +```javascript import { DocxLoader } from "@langchain/community/document_loaders/fs/docx"; const loader = new DocxLoader( @@ -23,3 +44,20 @@ const loader = new DocxLoader( const docs = await loader.load(); ``` + +### Loading `.doc` Files + +For `.doc` files, you must explicitly specify the `type` as `doc` when initializing the loader: + +```javascript +import { DocxLoader } from "@langchain/community/document_loaders/fs/docx"; + +const loader = new DocxLoader( + "src/document_loaders/tests/example_data/attention.doc", + { + type: "doc", + } +); + +const docs = await loader.load(); +``` diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 81077ddd3343..4695474df42c 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -125,6 +125,7 @@ "@types/pg": "^8.11.0", "@types/pg-copy-streams": "^1.2.2", "@types/uuid": "^9", + "@types/word-extractor": "^1", "@types/ws": "^8", "@typescript-eslint/eslint-plugin": "^5.58.0", "@typescript-eslint/parser": "^5.58.0", @@ -217,6 +218,7 @@ "voy-search": "0.6.2", "weaviate-ts-client": "^1.4.0", "web-auth-library": "^1.0.3", + "word-extractor": "^1.0.4", "youtube-transcript": "^1.0.6", "youtubei.js": "^9.1.0" }, @@ -344,6 +346,7 @@ "voy-search": "0.6.2", "weaviate-ts-client": "*", "web-auth-library": "^1.0.3", + "word-extractor": "*", "ws": "^8.14.2", "youtube-transcript": "^1.0.6", "youtubei.js": "^9.1.0" @@ -703,6 +706,9 @@ "web-auth-library": { "optional": true }, + "word-extractor": { + "optional": true + }, "ws": { "optional": true }, diff --git a/libs/langchain-community/src/document_loaders/fs/docx.ts b/libs/langchain-community/src/document_loaders/fs/docx.ts index 72518aec3b2e..e1edef2fc8e7 100644 --- a/libs/langchain-community/src/document_loaders/fs/docx.ts +++ b/libs/langchain-community/src/document_loaders/fs/docx.ts @@ -1,19 +1,33 @@ import { Document } from "@langchain/core/documents"; import { BufferLoader } from "langchain/document_loaders/fs/buffer"; +type DocxLoaderOptions = { + type: "docx" | "doc"; +}; /** * A class that extends the `BufferLoader` class. It represents a document * loader that loads documents from DOCX files. + * It has a constructor that takes a `filePathOrBlob` parameter representing the path to the word + * file or a Blob object, and an optional `options` parameter of type + * `DocxLoaderOptions` */ export class DocxLoader extends BufferLoader { - constructor(filePathOrBlob: string | Blob) { + protected options: DocxLoaderOptions = { type: "docx" }; + + constructor(filePathOrBlob: string | Blob, options?: DocxLoaderOptions) { super(filePathOrBlob); + if (options) { + this.options = { + ...options, + }; + } } /** * A method that takes a `raw` buffer and `metadata` as parameters and * returns a promise that resolves to an array of `Document` instances. It - * uses the `extractRawText` function from the `mammoth` module to extract + * uses the `extractRawText` function from the `mammoth` module or + * `extract` method from the `word-extractor` module to extract * the raw text content from the buffer. If the extracted text content is * empty, it returns an empty array. Otherwise, it creates a new * `Document` instance with the extracted text content and the provided @@ -26,6 +40,31 @@ export class DocxLoader extends BufferLoader { raw: Buffer, metadata: Document["metadata"] ): Promise { + if (this.options.type === "doc") { + return this.parseDoc(raw, metadata); + } + return this.parseDocx(raw, metadata); + } + + /** + * A private method that takes a `raw` buffer and `metadata` as parameters and + * returns a promise that resolves to an array of `Document` instances. It + * uses the `extractRawText` function from the `mammoth` module to extract + * the raw text content from the buffer. If the extracted text content is + * empty, it returns an empty array. Otherwise, it creates a new + * `Document` instance with the extracted text content and the provided + * metadata, and returns it as an array. + * @param raw The raw buffer from which to extract text content. + * @param metadata The metadata to be associated with the created `Document` instance. + * @returns A promise that resolves to an array of `Document` instances. + */ + private async parseDocx( + raw: Buffer, + metadata: Document["metadata"] + ): Promise { + if (this.options.type === "doc") { + return this.parseDoc(raw, metadata); + } const { extractRawText } = await DocxLoaderImports(); const docx = await extractRawText({ buffer: raw, @@ -40,6 +79,33 @@ export class DocxLoader extends BufferLoader { }), ]; } + + /** + * A private method that takes a `raw` buffer and `metadata` as parameters and + * returns a promise that resolves to an array of `Document` instances. It + * uses the `extract` method from the `word-extractor` module to extract + * the raw text content from the buffer. If the extracted text content is + * empty, it returns an empty array. Otherwise, it creates a new + * `Document` instance with the extracted text content and the provided + * metadata, and returns it as an array. + * @param raw The raw buffer from which to extract text content. + * @param metadata The metadata to be associated with the created `Document` instance. + * @returns A promise that resolves to an array of `Document` instances. + */ + private async parseDoc( + raw: Buffer, + metadata: Document["metadata"] + ): Promise { + const WordExtractor = await DocLoaderImports(); + const extractor = new WordExtractor(); + const doc = await extractor.extract(raw); + return [ + new Document({ + pageContent: doc.getBody(), + metadata, + }), + ]; + } } async function DocxLoaderImports() { @@ -53,3 +119,15 @@ async function DocxLoaderImports() { ); } } + +async function DocLoaderImports() { + try { + const WordExtractor = await import("word-extractor"); + return WordExtractor.default; + } catch (e) { + console.error(e); + throw new Error( + "Failed to load word-extractor. Please install it with eg. `npm install word-extractor`." + ); + } +} diff --git a/libs/langchain-community/src/document_loaders/tests/docx.test.ts b/libs/langchain-community/src/document_loaders/tests/docx.test.ts index 63395bb51bc0..82e66aa91907 100644 --- a/libs/langchain-community/src/document_loaders/tests/docx.test.ts +++ b/libs/langchain-community/src/document_loaders/tests/docx.test.ts @@ -3,7 +3,7 @@ import * as url from "node:url"; import * as path from "node:path"; import { DocxLoader } from "../fs/docx.js"; -test("Test Word doc loader from file", async () => { +test("Test Word doc loader from .docx file", async () => { const filePath = path.resolve( path.dirname(url.fileURLToPath(import.meta.url)), "./example_data/attention.docx" @@ -14,3 +14,17 @@ test("Test Word doc loader from file", async () => { expect(docs.length).toBe(1); // not much text in the example expect(docs[0].pageContent).toContain("an interesting activity"); }); + +test("Test Word doc loader from .doc file", async () => { + const filePath = path.resolve( + path.dirname(url.fileURLToPath(import.meta.url)), + "./example_data/attention.doc" + ); + const loader = new DocxLoader(filePath, { + type: "doc", + }); + const docs = await loader.load(); + + expect(docs.length).toBe(1); // not much text in the example + expect(docs[0].pageContent).toContain("an interesting activity"); +}); diff --git a/libs/langchain-community/src/document_loaders/tests/example_data/attention.doc b/libs/langchain-community/src/document_loaders/tests/example_data/attention.doc new file mode 100644 index 0000000000000000000000000000000000000000..e68399c5a7c86d6c165d989fc7e02566984b2920 GIT binary patch literal 56832 zcmeFX1yq&I*Ef6!>2B$6=?>|XZj|osZUqGC4oL}V6p#`S6r?feMoJ_^Lb|>=c=vbT zKKJu{-+I@2-u0iee&@_J*WP>f)ZTk$&WHEdpPtkZFGBv3gduQ{%PV9E!XL_EK;Gfk zXDkQ=7UZF2mseL;zZQe+f71V=9=P~LSLhh3fI++g03CBg03-lp02BaJ05kw}01N<3 z04NVO_{9Oh1;7Kq2Ot0-1Rw&q2|x@$0ze8t20#u#0YC{r1waiz13(Kv2S5+N0Kf>q z1i%cy0>BDz3xExP9e@LX6MzeV8-NFZ7l03dA3y*=5I_h(7(fI-6yP?17=So{1b`%f z6o52<41g?v9DqE40)QgG9RMW&WdLZqp-=;PbpQw1*eSo_F_W%q4 z3;~P)i~&plOaaUQ%mFL_ECH+ltO0BQYys>5>;a&4I{xwtvj4v%4Tv-Nc!05xfjEI* zH;6B&;om0$Fk}9x$iLx)w&UN>{ac#<)c<=A{F^@eWB6klm2J)4oZX$RJScUY-7IcNJKK9YS~_`L z`vhf8XqmLLIRpd)5NMtaJY2tL6M%ddtabe%=x5(x{{5)*XGA}FcL^*B%-@KB&g)O% zd3z_SHdqyjNDvl=x+$o?AObI-^BMXcYM-H%n*c!Pz6Su*@8ttPecuZJsQ;Sd8ogm0#Ncab9SecaqzZvGo@tI zP|;?hRQ0g}-9{;IY3kqsHTT!FP}wTZZjPo7Y!=QQj@M;S8IsQ4mhO~VwvPY645fkU z0ri3601y)%5$N-E9{LQDIP^=8kG08n3{(@LQ z1^5Ryh$HY+4&a#r0LKCj1_lC03o)gIaMHry76I2^ln;@hg)q~?08gx{hyqaukWwUq zNHRkl<&lxx0q)2{AZm(;5GQ$Lcn<(0MO28XA_2r$9tPU`DiByK=tx4N5MxrsS3u7@^01&AP=C=?9z-WliP$jEC=a1w2d+$*K zJ&{~&fIa$?NYHb?N@a%Xb)^o&45XrhsLU4P4thrET0cvmA?0;c2oEL*9*7;Gw5C6y zg+`Bmq;>rXEi}sfBkld4(DM9+Ht{F4&=vbfxlevV3yp67NL%w0T4)sfN7~mvp@l}v zf295R6Iy6g{ezbEx_=j8SV0NeKa2&`?!trljs{i+bToEOQ~i|s#|ZvT&G}R6A7l7CwZu=Ue~jYq)apN_{xOcfQ=9&j`o~B@sbxWb z!$2OuLU%NXf3Xix-^BpIE!qLb+XsRGEH@EYaOl8tql1-03TzYx7&A;@Pf5YfN)p)S z2e8l{LH^Aaft3pEfW`HW0SoNba4hhEU-;n;VgMy2gdOWXY!WOb$ix3OH=sU>>3W2* zfH$HBatmO^0RL}tVS%jg=!;)H0CazQ3-m$LIoRO=F2k@lH-6`)1n%-TuOOxPkKO?4 zonRnKup&S@w5R`SbD=(h4b-v&eD<$>gY=6pA-eV@i?DmJP+x-nC+_+XVqQz>@%&1;$|S?F?oX)C)p)z2FQ0 z)CkoKny%FZzrU8jU6(*huQ31m`4J64Yy9h*D{y=P2g;%NyLV7|gaCg{NFdOK&J2^V zj7$9P8_>EDR28won!yJ7#+`rSL%Zh72lL^Nd^ms)=h`m9fEND~?~giNkp9ez3wUt> zPsu;l`GZd!_0N3&R#z;HKlA-}bp3?&XWoCS>vP;c^Zi?0LkRxN_utV~?dG3(|E;e7 znFBwcumAVVfj=BoK~2ezbiJN`c!yu@zz^H>A8M%2`#;ksQ0onyg?~)H57CcOD9sU&_ps9z^&f!dEVf@_y2N09JWJ$|A5^A9vw za0DA4cL*h@(DHf<1wDX;ZmyjGDKC)o*9QDq!jC6S5OO(!%@}lp2#&t#|6T18U?XV@ zYJ&3Ef|_8k|4#Ow%b6{q;otG;0&3_H!LLUJ?oi%;*^ht6%l|Ly{j0u$|B?^dch+F!phpJU*VhYj|B~~+ z>S@7$+0*}T4VeC=2H@J{Us6he7J6RKksrqwKW00)CZPwT`s*_K+j;P#@So-#be%v) z_>Xg0;a}BY+`;Ca_oePx_zt01n6hocEtN|Kh(W|B3D| z<^OF>LjEG)Kac;T9%%D}-g{d>B1kcQ*cY;+#=#B|5%X~|HMh6)pft0zwsjJv{?^h?O=)W(O0CPQ%%SWe zWoct8@8@Qz>8GM)?q_c-wJlu@^!If*RXVV_H;A11mrYOW)~qTH%n6wXE!ZpXNO;VQO(BL z!`a=&*@aR{lb4N?QrXnp)(Kj2&7rI;q~PT4Vd`XVsUR&%4P>y{+FA&4@k&Z^N=oue z%gPFHa?0=t3d-;Za`8z>335wv@^cFQ%KPgb=&!sMzw-+GBQJC^IJ$tEr7hiTy(}$c z-JBgMuNy67`!~AqN(x8`a7b}U%gPA;y)Hj^Eq=L={Tp5Q{!tfppbR^7X#X(OzZQY{ z1I_+6alylHQ{U1F%y&01v9Gov7}wJkV$lH>5X{vKBn;e{{p$q#dxHBd{lX0k0|VXy z{PyDCX0AFQ*hr8_7(6(z^MS>Nfy0Km>VkX%s~q{S)0^+q;2sS(pkW9siizK0LN=@K;pKBN|1z-D#q!|r=qFVuXod;3PYmFrL&cgoYBeoB>(;1Mnwob%pUQD5rujw|@8 zUUHcQyZ2xAj@{qqJZBiV+>gJ=FGqKvh!pd`r(I<^JNkMDt}e}RZJYAB)!nD+Mdp6r zdADUFhpMUxf)qwanld776_5$+q_?6R5=1_dgpjzSl;wSy_b7JD-Uidoip_WGlAKfRvA^SJpBqp&o|S7DSNb|(-t98c zl4DNUZAo+oKAY}g1=%g)zPgRhFNbwl)gRK>vo;c(xrW@b3y0m<8#-vozNO2*?5REQ z$KKV3os8%= zVNxD`59xRQdZ^`g-BG#^t9XZ$jA8tFEie5SGaV*$`aJ@ijm&-0ia5;e!=5XMWZuOQ zqS`#sDens5S59)GYvX`KW@}5IZIL*~U1;y)_nhkXE-&j(3JPXAMkRAg@0N;hZInxS z&G%J|nK55_ywCSv&}cf_BQ==he3P^WGo7%snqE}GV2dm>R4^)7e5wK->z$yv zm7~vpjaItCDYc|M`e+}Kn`~sIFF@3Hv{A>SAc2=)Hf~#{JW6d& zCb0v-B;Kqr44(A6)4J@C^$sd$t)5~wz4I=0 zCU3h#Xhd%oKFl-6hjMXi; z7kL@lwz>JC-f=SxZgN8j9*YFuH08fM-GxsRvT31KjCh<#`czE-Z$U5LF``F53zxgA zdb7Cyfn4AtvV!GTa}$-anegsBHq(iEA^z1c-x?e07)YLLrzZ<85PVYf?^aJ^Eq1Wf zHD#eq4qtvUqR%_mt0X3qiLtJujvY(jR;CFfkVpj?6CP~7O^CWQHCy4L6QD$d7E>;6 zP5oM3Ve7_ZIN?@vd1$0-@zCa-llO(-^bDUq@%R@zaXwlDP#^ zuwU==WWLu(VT(MA{}rU@)_Miq*zF8Uj~3VN8E$vefxh0b0h_UO$qnYlvCBksd%xHq zfruH*43Tje!9l4wSbp-%H&rS9jYkj5KFC<$GqY(#C+OFe-4Pl}!DcRwvigoAC|!<> zzF49rG%+%t#rbyS=HnxOxI;a}Xwi6^(fsfj3c09t*J*ZR5t72$Z>1<)UIxaP_HC~| za_%fP@lh$vQN6@6Nn(&XM zZS>IF`b4oNQsx>wMR=&&nsC@LHYjz@lA+mK_Cb}^f@9D=2{y!ZAO6z}H4jHZ*LebZ z7-E-nP^_=o$>b2c&$%D9nDwoQ6W1X>jT5mqG)E*``7;MKU?h`V?^^ddJC#uFd?lVr zw9V|nS`j*6hIJabJs*f%Y0o&1&`!FfBeR4edJ+fek%-pDFfrUWLsVVGsAk9B_wDk< zSVQ>O?CAfH-bkqo&x$J(#Yh$DO`m*zFAhx z^pg)WZd=C{WUMaW(w&oV?{#(Uo&Mn6WZbh{-9y<*%*^=3+r~%P!C!BvSwmgTgv`5l zM1@}975y9^f;H)>d7?cTd~@TFUbDKC@7#7zwG=BCCs;+BW~JPFF!$(K?haSJ>V zQ-W}hLL34K?tp1%l(-v3|M_!?@XT1$n~PbT+e@n%O{NcbEw{I{!-~0-KU-;-N<7-c z4Me%wRx{Pt7`GmD>mcT~W8+A@M^3|v{hg*Y3G_BPT&=ICW@;(QCp``p0t% z9gmCM>Ff#NGFsM;S7UM2Q-8Ia9aQNX*E_@g+A+eWC%zk!7igyKjI!Kfu$StzoG++B z1gFy%5)xYeMtZ!0V>k8mJDR_IhP;5)twJ+1bD{#-t-|HPsi)C84%7LW@{es(im1dP zaC&5^smVFX^i~e>YzEc(bCY$#oXu9|ZVIAB$?p1vv9a+HaTytriDunT)bVa;tFhv1 z=|K=p;GS!_@Ab4;qcpjCxyRI|?%CPX_B7aRSS7}@IDJhzSXm){dT^hWEvkY@z&mcL z@^P14-E;31W|0NydAkN&UH#aBNqwX~UJXIti>@kSL!k3*9%= zg-5+NBsA6`K<+#3U0D)13c1Uu*4vBJH13bD08d6&uNP{MWv|L|xvT;^Ai{0yV=_^-|@}}L0o@Be6s_#19ntMTe zbQDiZMO1l3xqIE}o-V_N&VMLFoY_|F`EFDr=5e`6YT6&zvWLb`%&i=>?mRfWLsT>7 zSt>6%XMd1ssh!!}o>}&;K042oV7mpS)ux#Xc6Otk3B?`b%)br&i!R^fY0IO;!NV!n zhKmONJ{&T3v?NAkl%mqRe5N#ycpi?z;)p*Zsm!rnB5A+lR#LHG4+TH`7rZfP&MJ#@HZ^9kW(@k#55UwFZ*AaA*V!9FBd(-lVaake+~ zoGO8bu*ahe5o-bPf}h$a#qb5>>Ynq5&}#_e|OkFC0elU&M2p_g_K2?(G(Zw>^+% z2~IIuo@PmJoXUBgDYs=iL&MjaPWh0{lhiD`+u0G7z5h-vr+Bg2o0xYiSd;Kxr@SrB z-On-J?vgrI*q<>>OZhGBoT~+Se66~Tp3dui1&NsR*oI^8VA)7Ej&ROwVJWkOEu;`H z&?rr0N@aX$sM4YtC!gihSw6tfqL7`820*HeHGv*Bj`<^mf6vbRY)+(EqTeSSiUx(L8-XdqC zRmZ&G(Y}K4%1-#R-s@G`?Bq5L5;7Ou%CdU3GHpDh{=w-<=3}-@9pO#0_p{IVneb*6 ztCGk$aS|dphsJ3ddhCsKFjKzj*?Qhu8O1EApjeP)t&tpAT$92SV^!Gf1snJL+mNo7K)Z0d^-NQo zg>Ms4g-Gp7L^axk?(&kmWldVHWU%h`x8&SG0)saU3vpqgArZwVru#VfyAT^8mAVYbDR4tC*(aS4YWEuD@dg~NT1RqpC3T0-T0 zv#gQSm&H_Ce6NMjKg&fpw%$Vx+vgJwGQu+>s^v-RN#an6wUn^QMr9$;h;_6~Nn$}b zomxFh**1u0&v2#WHW_fIO|XNtaCf{5rzama8-h`MN+nNN-aE%QVc=|@P_n`tn-*&y zB~U_lui=e?ZplrO(k>X+o2*k6UA^vDeoD0K0tla|iH8guJ!ZVKd1m>tBi)|Ul5Zsn z^+v2%(gqY@O*A{4IEKD_&Btd(;Y%wkOTqjcaphnlb;9o@@97mJ{0bs>$yf5x z*tl$eLch(%sL@oJ>1Q9ctZl^0s;ssB%*ggMbk8Is%_~SuM&9gRdo8a?z+f=)-W8;E z`3hpgT-uiJD)1R&>D=mpeqLpCg@M7!Nm53>N1|=4Y0<4pZ3FlSh7^BzciIN5Nf~kU z4r$(;OOvB-5icd$ELm)G16*TrYu(7qOhjLi2%c1+yu_>x5*`yRM=(@-&Dra$hQ*Yq zaA>bhA?o2Uo>yV#soc6exC|5^Hu<#fJNGQ&7)mM8G+!)X0IUj0<3F7JZ;N4VCA zevK@YE<5b^D8(X1sv=!;vtwuam9or=k60raW|Qk0-^S@A3Evyuk)|QE#S6`xM2zWizuJMJc#I~H28@7lI+qi5=W`#sVbM$dMdxI-iil04vFtJBAmfbI z7e@$YB3kOgy=aD>ons6Ng!{e3!bDwVkGJJB!o?&swU;AewXv|U)+Ym`?%}K-922I@ zbq4YoRBkymOKc0(>X~*0C(vM~J-?HUJrwZnlS9E81&V{k?F5}0XIh0Pcgn6HtGbNi zE%w6pB-3^DR0aKE_zTR-ejbzcGCNY!&v>K0m)5#i%Flh08|dfux}__G2+Q>GgDF)A zycd~2k%)JKQ$k>cwLa?EyUQ9(gdjCFJ!9bxoIG5A^}flK^cbwVJE8Na1uOJ#P^9Wpx2M@tSlRDUBeKXXl zx0Io(TfEGWzj}}Ukj)DJOkci?-X2kdZBo8Q+sm1R*%+rZb5IKLgV2tfYc0oGfI-td zYsU0oT(Q3VH?&F^c)!Rte-{ghRFoC7mkM!98~su&aI;dd5{TpIw%=S^D+P2PoEP=l zykrq1niX}72z4>XE4YLII$@;ZGbNE!|GOApsgs1hs1Fl%-=1F&r~InAJG76eOR%q^Dk|eZsds)MVB7{&-%FXa&g_ zj?xQza<{jwjMci~UBnCWCALbGI=BiIvO)e_%wCfff135dM=e?x$XFKwvwqqUrr$%Y zG>Bl?Z+2E6iSVqVS79$OLb}HGI`fD_8vYh{u4Zn_*Tta(x-bX4Yy z8qH;j8>X*n-IqppCbHD`EIF;>4Lj~h-b%$3HpfNH=|0i>gh~EEp`W zM4NwFVL2r#jYg-u&l@Gd6^O_;^U`pX6yxd6tdn+FE?)Ir!GgAPrs3(~i*y3!FaG^P z$>uJ+o};f2vqq}*#Yp>|aqwZ~E$Un4jZTRt_^96!`H|h7AYf1l{$anX8460pMo%^? z=U=ejd;VYC1gIRrT|sn)_D=@8j0X^2_OUZvLAsSxw|$MLh@aoK5(_&Co_D>1yuimK zpPE&}Z4FyNuyUJRlOi{pyer~f%bZ2Q7i-3coi(}_AI7D8KP&_d zbRFgj#eQ2|6B!g0a$O$2Z8j30W7l~iI~DY?_elBV^+;@FLfLn_%ma#SR@#7A-CIE? zFU$#RRkf+nmZt>+zIvi*U=s{s3vHIWXDdo3@+4zpgTJ$3lG#@?=Gy8Iy>mx1D;qa>cNHupv{hTRbtu z?<0Xt3za4WYp){&rr~wfl56Z^(TEtwGYDK~*XxI{qKKW3)D|Y(kZ-pfj%S^)?VL@} zzguwfwD>&0aS-dq9iKGu{6JTM9BnPEnY+tK?DK*~bf$0v|9y=QjHDRSP9I7iw|Ltx zE~(o)Jn~VFFr9?E>$c%d;6(WT$ceY8QeY~6abW~C4+nR@>x;dEsK|=_fRD;lFB`Ez zj5OE1mbdB!>ihDQEaUpeN4m9px_EB-uJUQEhB13_yfzt)OmT-|G>RIuNp5WXaP&T^ zY2@@77@z2pbsZbYPKgZagi#-^?Tj{eyc_E76>S@etGiWo7DSEvHP*f@y?d9dS%reh zKJXbXfe)g!Pi6t-!Xb|U#aH6Gkw$q=yVcENlxQ8tagSB=V{BTIr^wqQXHk|&u9y!Afxe(uc&UGDX`gu)V!bkF+bdq!VnO2|Q&w3mJ>hex|& zk275wd)T82`lK9)_#V?DWug)p%?ITWKJ4+%fIcplSK$r;v{AHeZjW9 zi8+{Og+wP6I)On8Bz_y!U%ss7_awSbOnoXSj6b^n{=IrK8QzAKM3{)Vu_hhsL6L>K z#mI(w=LsV63#$i~Ow!##?+BR0A^g~6Mr9@O#Du*W>koNZdq&IIR0nU|CAIn7*BejH z@}hcCoE+RYoNSi8L+iml4n{RI(8Rk>BDWmitXRY=!<7)u#DYKh#g(t`zUHRdSNLu$ z^sqNHRiti?OGXmC`$tnltAm4&&4Q=p-(UL8rjt)EO&*fodW>yT<7yTeyk79^q#Q4> z`$ThP9%H;_z|+sjN~BNcky|9wFthmZRG(oUqnTrE83Qjqu{vglC5liVJHr{_lBjZ) zkLy`mi&N_Bi{_{|?NlFz13JdVPRizkG1mwE?_P*(eHo3uERrZ@n1-ti7Q~mWa`;?7 zpva;Q9x}1TnRL~<<~S{mO!zmwQgTVI(_! zgQGKsCELQWiVj1s!mXdYx>%$a@2v)D?t8 zKj%xO{Do@n%b`ulgBBE9#h-n`7F@&*92Uf~2W8Hm+gB#-<(*q}U9P2GBv!zEHa}KP zzgVL+-s5U*se4HyX-vec7+5Ntl`f zX`Xq%LpVE**QA6Rp8e z7`=$M^&CUp#uko0Z;Fq)f_#+HwJ-PYn-ubHb+UVHsdN(`^J~8d?PYS*=Kz@J5X4Uq z%z5wj7GmR>4jR|<-;oDvw{zJ0X5F5_73 z2U2I0UW!W*Jq_>azI(weBNR+Wr>_&25qTUKOr~aNvk*kOZglpNX@s#OiB_ypG~al| z7Urb9>C5H7@sY-1VCw_(GduZ(p@;jt7hzQR!vlA%t6q#LoE_s>Cy_2K5~7t?ljh3m zh?Z9;*=e%sV7k+N%`u?Um409r+0mgv=;MmDLF399u@Px6+FH>dhOwSbzrp8XjKEKc zJ5_2XZZVAcwY9%+eEfUVn`c}sVk{dJWS{gbTXkU~$r=Q?kq)rWP_fN7G*n&776$^K zH!);CSuEHSBeNN1H?ebn^Bh%IBZdD=@nwE&5bQhcULZjk75*ghj zZ*&Xd-P0%CT(o)C^IS=SR=#)nscq_e-)2XyVnmlcVvr{keOk*MuuiDrONxx~wccDL zu`BTr7E$qV2y-Z{%*{#&oH`%bSn)g2){B<}9))3x+_=*3nwWKz_p(NKWjJsm>y_RW zWVdi3tYcvdmFR9_^kAY_3;8x|oaRfh%?{by3A|aGnd~rLf%r3rII~=^Sg}$MY11Fx zMChDkE1FrQJ+l+{sqbK;clq#aqJt=oy38cqdgsKyD0=FS^0Y!o>Ej-gs)FuA74;&s zfg%}E0LdIdeeKpBoAxf3nW%pDNDU-2tmy zksaY0WYV^V$x@5p$|9%k35TD`PxMA~{UA|bquQED;&ds6Pf^8jq(mcbWcE{N`kHhJ zCoWTjiy_7OmLb=tjBJbASBo=zcj7+xCcXWBItI~8IkeM46Q_+D)724NKEQo!@ofXe zap|LZ**87&S6P=MNy8jp>jWVQH*DH>+P{F zdEei3rsoJwpJ7$(xG(zRJ8{{R{>7#=1_O`wLdHyqvsLJg|F9>wNCQq@oZUVJp8$gKHcy3KrW( z?mGd0XGo8+T3j+;Qu^}l)LXQ`#(P2)Ygm891vWDesAbFXcWO?)W*C9hH(#S(?8 z<_Oh!3?pN!O?!EF_hMTiKaMpulQvqV-cmIn@JmI+h*QBgGVTT1`NNFD z%;FxKTtQwNOV&!vm;Fwa`Jusj-$$FTAhubl!P|uK?Yo(%mj@f*F8skZ`bqWX6$I{5 z+n9aVbx@3DrhPehs7mv5gujY}yD^-K>*Z#ws|@`!^Xw|6f&LL?2cChHO`J!{+_9K3 z@5QWSqx+YSQTN zI<<>lyEGLP6I<5U&7Ngd3Q#DLwTe+F4Re$TTr)N6z@7T?Y-6#QPo=p_GP5i zLHaG*!ZG^pMfQt5i#9T`_z*+&IKYn%J!`z# z?${<^46GB7b?E2}kuO%OS;fdCbE2LrgqMFj5sJ5b(mQG2Z+Iz)O=1`2j!L`DXsl1l zjri7NS6oSps$UZcDdcNID;xhRP1x=`iSSZj@Rx}$M`Vc=T}o$ZBm2h zpOk#Cp{ly`GX3pB)V97LQIm!;Y0QT&qw_aeu{L#x@94){ZWlIGloie)%>Uo%QLdlkit7UM{g|t5xH(@a1pkR+3kcHoqR{9mCh{ymP^yg8M5@ zKwK2D>hFE-d{U3!oY&M&$+f`w*gc4M1%YQ!t?JX%*lq22Ssj$>VVoEB#v?VRPON)Q zBZ+1QQ|Skgr)ysM$whwB?0KZ?h`5tNK6BhTmP50HN^+AHjoNvj1P9a-_%vb-4E zz+d0oFe=<~zU8v32=+vKdesBBEXLNNuVv8-u zhg|(kFeruJGgZt_)*u=%N|vN=dKJV|wb96Aab&Twj$y=kt@z*5U)+9YPT!Z+^ojFA zL&JlD(o?r_fa>;ii$8xBhWo=O{4uPgV-%HeZ`Rf{`py%_Kky!j3m*Hv{)YKD=9%i} z!3myhnwO_{PAu<{W=^OYT|vaZHB1*!^l=?`cCIjVd|$wvzqmG2ib^sZVnoN-%7I!TzF+P#Ta)j*8WOHq0tP@w)M0 zxnW*kFjjYpLfdxpiH-0$PaGsLEbnI39+qA1T)@m;c4U;pnGrd6)>M%#J8sPpV)HLG3J@O+{1a^)-MRl@VUWz3!S#nH*>S1%gk zmKO}IdBOE>;Lk39d5V4g!>Kb9o@iGY+i6!+d3vMgRKYof#ysz2UQfzz;b-o?!{^l} zGhL}BWQ~IyX)+6sXgJsIHtUt0m|6dxk53OC@zFFtBpU0cJRi;T=)(z3emF$89)Xq^ zPSq%RI$9Y|TCdRJ`-F%sb+t8eb7IkXbq|p*G(NP8Z_XWm%L;OR8MqPr%!vPWtK42& z%l3y0#lVr{<}(44ys=xT-!byY_MPAHtG1~gtnWBugVXi17h)QD9+zxE?}!)A!0%I(yFqd~8E+g+KXhr0u#% zFcM=8SA_Z!N?_2#ywRB@Yqk*@h@aV3&TT$# zIJJC;Gox86Tc7z%ze1zZnv%hcf>wJkH8i>Ep45-WC^kR1vO0HkT z>dk~LQMhYLv`;CwDMM$O79;XNDQ=xoulN3IPPKf;m!Y2W@JG5W_wydqOTN`-!ByuI z+S){@eV)9-#rm1b%4~sJ7pgY&VZJAHjOF!@=(3hl_40#j=r>Og|N537_ z@_;U}tJ$A$6+a)TPk*##?6S;D1D}b?O>dR%(cQFU9#>klJ`-^rSlEI`r{3#){YgmQ zDxVa~@Dv9$_ccY0vbM#&rtK$2fW37-I@^^THZ3P3EyJVzJtg_*4pC`?#oNb-+S(YD z8dwF?!U!ql$nM;;bnjI>-8cl=rwgcu6rL(iQE1n!v=KMFvljYnF-g{Y%SHGM?#8q^ zN@$uq>dJ^Ref9DrmVsD60!kzm{-j>2Y{|1Hyfck+EO8ZaT;IJJYA?#WUPrk*bAI*C z;3jDCxi9aH+C!t28=y$K5-OR|;pLbk$lOgH24jkaE^Oy1>3{2##qvz#EM}jqL`X;A z-fS#F9?#t>y6%+LFMF-?5jOYxa$7#RP9E`TlW<2rlIR-Ty-ggGJq_d&X{Kv%Y(-nk2UJKbM?#L_ORjal?ZG0W`3P4fwS1GQqz%-&JTY~TtdGD zGn0acMLnc{2bP6d0^Dy=Ex(%99gcoZKE3v2@~qyIp%p`Wm8>y2)0>qmzI*q;cfGGX ztdgs7=Cj;@20tP~?(?Z_j`0G?4dt{P5?_<<4kK}ZM^iPxZ_sKmwLDtc=$LjHsu`^IL` zi1DG-734DxxZ^NX_Ow4*SIno^+ZKG+8|)&KZ6CFI+t~DeXWsD|*~43o=3h#~aa0yH zt*JDTK044M>Bi(TM?G?GN|f?Bz&MTC3NuDI{914PP1DdW|ES5msZp@${cO!=<_?2` zfs8z|z&O*sS}mpqs@O0UBgeNfNyAJpzHg+Ei6QizJS97Q2^S23jOv=vX*AL_90Uke zgwKt$#LY2#&c3`K;j-w7=t*E`bGX4=*3;=;`Xo&5NKVtx?`-gcPEPWRn(lfgZ1V5+hh^F^G_uUbwlILabbtf52!eq=35ig zw#GP66qufe=`KWAjH?zDN4z{ybecNfW(q;mbhYNbA7ugzPSF4)B;*(g$4AWIijdVdKOc6BNX#6WhE*#9hED& zum2;*^fxtriG*hsECB|AZ{Z@5h&0nn_=7rTFh6YWIv2Z1vn@UGpE~J@Dj3_ox7VOT zSFxEyV3!M5GSK7{702=+*5pI`d_s)DD*iq4{?A@$q>rS99+77R!kUbtoMj)OwA_B$ z#FBONvJ*yo=bNjLmwrM1bn6u)A;Spe%t3`n6gB?EUCSu5?4?=i*)BZV;zhHiE*(m| zenAV*Fd0qKPJ|Y!wZ^CC-wGjUFn`_$8}(@B|k zRuI^W$_b`~iYm?GHu#S)NNe1*-&#yoFA|1RrP!(QOkkSHOPunTpPvPsb*^Nq=IkAz zo<&%()F$o^%rI>5zmha4p<}3#)$HcngGsDOE${S9@VO*sw!>(BSvAAfjAPA>SAd+N zlL+P#Z6wlYYAlB;Rr9k2^On_;tz@x1mK!cFbC{>yIx%Cpf+V_kwW|e&>6FO~Ee>*Y zteBDnjRo6rk>5SL*BueG(!~{9AYQDMP*Y+&hz z=cGE88dXcp(KO?a)ID={x~+wu5jhYr*!-b<@?oK{sXG%t#_5nm;p^Id!R{`7&*7XJ zSsD5A`yXZc`ddx}b@D$2w5w6^PDqb0)M-yFwL3a}51ij!^u3=V*zD9jM$(ebrT2DR zp;w>oK{~vCvtgM7&z5tXW_+c3nT*f5FKvFO9yyk})srzoMfN9KT5 zsh!kt686aashQ!H^DQ;%FHFz3)n_#9qbN})$25G#dfB$g-ut|bIlkGMwQZ9X95d2; z6D}=td?Ed5KKW$MJbSWSw?}4c8!ty&X1y$ZA$g|SL%T(pRU@*F99&+@X0zraHo}+ttpjLtOST-tn79&Fjq-gRFy(p6V$L2}RXQ zc6X-s-lV7VDbIlvQqyu9!$`-eZj8FP2-j2il6#x!hBxO2a;HYplj8tyOB0 zyU6jH_suE@M?KF~_L>V7I3i%k*i#4x~8Gf#i>TKgL?$W`0>#;+CcOC${Rb4;t1ab-98++G>SW}YTwpf$a@~tV-QfzA^T1$9HKNdfSjMu0Ui$C)Ris>(37cN%??_WX8 zIQg$2%`e(qeK0ri)#vv}x4SO71wS4ozBw4UxSR5g%MYYe0CirG_NgOLlYRh!<*IR)lcg+*&k)gZJ#VuF2$v)26KX{B8VEOA5Z-bkWghUnM0($=dm?N? zjON9i^DwEZtJx|nO}dP)p_TS)QB8dim>OmsR-pOctV$Lm)`mxI5b-ETkOPYB*0ryC{C5RO$0 zYeaAM9#R=Hd~TC1v@%o8X2k5{4}Y)n@GxB@Q9g4PCp?bI6e3wlkWn-nw& z5B42VJvh=SneF2TG~fy8r)J^}&&~HnM|Dk}BbUbYwI0Uc+ap$ zddc=)mx!gMB1o#a3{%}ZvXJ_&&I482?0v@6)=H_y+lu%2*vn>bH)VT#*EQCtdr$_ReuQM#DaaBeeAkn$i;EYa9oQ&JEl-g>aebtZ zV~8gj7OFN({|F%C7tJC#64*w|>G(3?CmF z_!l0UuKBlj?cA4cZYVM|9R4zl5jkb`V|ZvE_LT^gqE=&AqTI*GH@1m|tbA{5 z3TJi{luaYb$8^a@kzZ=wMVGavrO#5rCNQFHqeI?#(Oi*bicbXxZ^}99zzIGikUNTQt=Z^>Q!ajMJNl6l%A-sxOsu z4Hw!9El$aNaH*^|Lm_U?d+SS#BvEAa_>yauf~6}-Zr_MFRfsO*6^>9l$T zrkAf5bRE9Qx^Q{*;uh8kR7A1YVy_ii=?~eJ8_d=BR!jSS^|%>bq0ZzkyQ1D;5cFu_ z0!{nOP0m=xZ3daoFTzc~E?_&v*q`#K#AH|4e7o%VnO4T6R(75Ii%qjZe}tU#>PV%a zvtqdqX?mwf`Tb&<5j8Kg&c!+hi+rP-BvDmlTvW5txjyFU{bY9~Xa>AqpcNXtC}E0l zav)I@yV!%#1Cv)qP4z~*^=(CrZRXVuDLX)Mv20Q<%??+>NeV?yGK1k z71fAB|17JD>^srXati6jilm(Q3u5EkcTTOg?uHSr92zC*pDgWOua2UbE?8`%unjTX z_2R>gQE0A~dma7`sZk2uwQ2WW?jD2w=<9;PX}jlyj~k-LJa49`H+gVhGz>m61K8WN{cbcrhc6lD_(9IIiU496w#f$vN8} z<`{^xs?ElB&aL%!`q+i@rS4skhNe$UE+wV9esNKKxZS(tm1s4E#5&pR!XC}dl`P%A zEUlQJrPU&SXP5mwO5d0QHKqj*+DX~!eVq2aP;(vc*JNT^c>^9@PyNbn=h1LeWgp_n zzobzmyKwyjVaQtKa_t>;zxUr<*Q}3Gos0GyF@M5}a|+)Fot3H)?O*>Y$kf)iNY}=T zi9f)OCuIL@vu8}5kebQrrf^tyUCFhSoEPQD8P=#@N6FRoYa*`T`={TR^q{0J_*j^~ zw;mNzDzwHKmezdO{^&{Hf$UyO@bxwZD*XXgr7c$C7a8oTb}ygHCi1X{C~K=Vw|r3g zGJK7Dct?%hBYnov$L=TWL(Qd?C#S8sFAR~X%sZ5@cwfr5W-*FInuJ%$6rx#FCvnP*9A6RsI>M=}JW?Vnxs$CjAHF7AIGqs!4{i zjY~pks)XM*3afJ0y|iMZbU(RI8KUDYmtQ~pYP?MEwKkJ^vAD>IGSj#Iqua^ASY_~p zS+iIEp8KJG0t5Q1dB+H?)OMZr&8ndy?V!5xuWuHEl#)`C%H%1haHyk|tCg7oCa7P3 zrj$;alfaM_mMU`%C&}~u;%bTU=@7evbe$mWGKPMB%QVx-QL*s-*mz6z5>7Fjl4a+Y zwBN&Xara%6BL-Z*5xlA5R#d?fj~C&W(Cqj6GQzS<;cL@Bzhzmq_$16uJH#-G`a&g0h9yCrP4wni)(Vm**}|JOvloghCSW3d9!KtUH4R2b((?s6KVc_cKVdr{ z)Op)5Z9uG+>?iIScPa5(m zl<(uBqv8v5c31Rtbz^q5QCcQW$m0r8PsUx}uWC}dN2)gdmh9%Y58vK;%ohyVb8T-g z;~6Z+JM6?wJ+NS<>2+SJJiQXvJHQ>f{9L6&g%9nu=pjB1oH zpLzFo(3lfi2JT=xS+sp_z?R{$84_x&hu*4l{MudLEi9_pu&3R+9mZNPM{T;=ze{Xr z=ru!_#UGYr;)xac%-Em9LSLhKfwlHoZv^8-A8FkCGDne?PT9tNB+rh6x2URP>I&Da zl`}eKN3QM&)`q^2H%kg&Abh3O^Ubk}-vQ0%F3b24Wv0s!9~=24PR$G~37kFplo{L~ z)GA4a4F#UPJ+=welHmhh3=6oNMfRkJQHtX83ghfb z79H3IoLF!f1V6k$gEp-Qv`I&+{Qi1^SB-S zy{+?!5z`xuw1xvKuB${skF?|$51)NMdGxuO#@$vY)3YYfp4o5`HQd3ed`MeIak+@m z_Blb9!bptlB*tbe?nu!42wYu4hu0=1@5-JVU8QECS;&tbaG9)ms`-IW+$YC^Xhym^ zuYyUQ7Hihm#DJYRB|js@>GRr6P4c-GGqcv)lUli()!`E~Ns&~P43G2PXxJ<9pUUMX zJtMo_uh;tK!O%wjS`(Db_t_6Ax}@sb*7sc;sYS~qrt-Bus0@=taggdI26DQ;Ua8C> zJPI9PsG14l6vcfr!t28LPW}`1liR5Ixaf@810-6~4VX7LEx80d%^U=s`%1AbI-g%n zrHv1G9Q*FWm-KLa)%p*;-7n1F5|@P9f2SfKZc%8bPZ2>=!W!Y?Y9+d4o4VXIHPwf< zt@@Byva>2S4)?N-{ZxDElA}`prpM7~Z)sBZ@*OWtUKQObNLjSR34 zL^p5e#6Ka5RJVxOIW3?(&e(b`6o_gRnqjG+%D=w#v`|$Bdego&Rr-X#S*AQONxp+8 zS6_=Z7|&@Ugt>D?D&i4MNAP%lw)7{*Z@HygmO&4`-M!Ix&)GjhJtyQj>+{4|JV*UW z-Yuv0c+DM~!_PxL#0yJIATD=`E;roLroLY=5Uv{4))f`s1O=qIkKuX9?Xl7kiN%ea5FWWPl< z+3BF<&3jVmL{wwoh+l%{Amaky*2!-Y7GWuA>|O zJc*sWdD^B#%sf@i78(Df6bq%@k%-5wIq|U+U#81)ZxHS!0|{O?Iz}@ElI&=iM3d_7 zwz=t5uF3;^4Br>k`kfd;qu$;QwvVdTLU-;mb$C<0ni>e(I+pJ>32+N2?;aj3oZ+%u zt=L-{ZA+};jCBi>?Xyonj^ifCrX|Nn9Z+GLZZ5;EtX^bj^GKoMGk7{LgyL#}R;OP+ zRve!as8C5mn19N*+eOcRKs|0P!yWNvGCy%F?!(Nbx{|7$e$5ugS3mX%10M*OkhwcM zFW3gG8Mc1yQPeNuYFjOJ|8WncL-8~FiL+9mO&t1SCWUQ>i%oM=#${>d!NmCikpSc> znFYq2yww+ud3D3PTNmC5b9mWLZlEurwOrvq#VIxrw`W|sHL`6_oG>mRo1(ByJ82W0 zeIil1pgbj(;^QwzpF7vTs;sQKUOY%EX;yrsUVAoIIIP#TPUkQiR)lNCHJ(nasncCN zENSVu$FH0|Ic~Of{NnaFjd`?@H*%F!!#4#ppGWNENEO&9TZZiX1skfPjdu4un}}&< zpW$~p7Z`8jF*V*S32+ZFr|vV(a8D{y3JFTI7}K~T`J&_1H6AwC9xB8nul@&ijO)DLTN_TTxmRwjp|5TLWKkzM2sS=7cUOvnkDYY<%D=@c41Z7g;awh0gIOlj8X__B(s z6Lmi*k9p|PJCmU06FafD?T&gHU$qLHWV=4pJzmsP__lZ}o-*n+*QOgPp>N(*U#D6_ zSE@?=*aQy2yjR24_8dL5IF>Rh&?h2QYmy{e*?%bMG)l`3nHeVW5@uRjP~+B!8r~AA z-6gWhi61Pm8S!`KaXh=Xk2d%Ga=V`HTQR)?HM_Cd6*EgdmqOL+)`oZRT2Z9w>r3vK z431?*N6VzDSYSWAoNWJ4TGf#Kma6M={?*f)6K!XGmb+djkMxee9BpvU7nPyCZLEu| zj~q)&+^H-)v@M$VpSvUEf9TlbxaeiWw4!rR^IoHXB&U$@|L=3j|LL_L2Vu|Gv%?g8 zya~7zdDTLR_wVyLL_>fJj)*`|O!fvfbC z3#!6W^{U&kXQW@6$p^%iGb_t{zba!cvoK)E*iTO(Pw&f4xzG?qtqx}LzLSm4`K$zclNfg^`8z#oL=MIax3>?L3mq&lT7{Tz%rur%zkDgG zVNcHb)YbQ>k#sDVKln?1Y*tlOweN&BM|%Q$@I?yQYxtV=IRhnAdBdAKfmIKvzUm9y zoOF4qNH~>biEq5f30?eID@dE;E#CZ=pB15d z2IxjOj2C|lzBa;sg}AQ|=~9Sq4V9~Jqzp;?))&&ZcK3N3TPAp3xR?iWhy_tzmZ1>$ z9eXM5yo;^dz*+FPirbLEU+Q`q$u__2@_nAyduN*W5*NJBG@niC-+{iiO`?0RvqvYtNloEpqH zWP5LT)7G22JFFOk{$pLd=RE`2jMG?5wRE!;b7!3I8f$2rXdedY;{p-+X0s1kd-lyq7kJv7QzCw}68t=M^@NrqfWpz_5T zMR09zncWND=WEoLo-{Cvrq?8sP`|U#J*(2EtCziqO)L9kc&N1lQ#Pxch57+~yeE2r$i!iO-XsGOU3en>#ZniUZXJn4 zKwqYH_g&q1!8GCD?9IkKep9|nqu8F`VTF$b7Av`!+SYVipj~ke>%rO+E>ij!lZ%(| zWF8C4J~7b5pF8v`iZ*u6M-At6ExCbXNlk9aeJSJ__xt;ggt~SQG4?~-ehAh>UsBiK zy7kpskNi@|t7^0>$k4h8_o}kNOvCNF#mp&W#NM7f#ICuC)D&C27K*`xB0d^2kvAtb z?cA|=9(AEir0=ffhj(np_iWK@a#V&tW_XAyKQB{YI$>vwTG>CUK}6Owjolq|FSLBa zMNAgy`Q?tPjkO!&C0XN`^Hn|FAzM@0&w^5mTIPM+A1o1}TA9hLP|Gu2gLJ;NlaH<4 zWw)S1{q=H<$m_-s*t5Bc;?gL)0q4mbHsg*M2FvD-7(B0t?1{Ekc`7Y^#lox)vqfj! zHInsfH^*D9P&+M%#N^7{t_f6X(s!`?@PqR7S&2}c&+hkseO80%x(g59`FMMmP_{bswUzdI{dZTN>ImvTQdR0ab&2{wF_-%!cy)7|RJGEM z0HePt`Q^oH%Sh(h=33qKcb+~RhW#+8yDMKKU%WMOx_D8Wwxv1AT!R{gy{!+4stj2& zyw+Z)b6hiFD8hNm;)3w_@^zfxoKX9|nv6^K^P%6$bcbkq2`8KIT*THcYN=Vx}me?aO=zS7tXod13Erpk$}(t4LCB+W6ZP zCav0e6c#Faub7t;!vphswu~?k7ABnMC z>WG>V3z(b;P*caZu(nv~70B(V{M^P(KU9T}6c_r))b_Eoy^SW4w|^3! z**GW53$I?QxyLWYCUFh&X)NSMqM(noz2)%4W)`x}QJ3f_y=^1nco*Z!i4o5&FL%vJ z_3uAn#9tjN*+{2eO`%u_$Wny0_@U_INo{5(lr0O(%kKNut7$4$H~fV4V&AEpd}E(I zdpCzHh?}q+?g-wFmzI=kui%q*LqjR;?C$SHjN7yq$h{JlG z1{kPj-DYhP7l6qq`^K6Ye=B11oirJ#@O!!F7Ng~C>`g#L`0_EbjHt5*_bUDx*X2^u z*$7rm(VTq2`wLHDLVN=zoNr4qbe~gO%@3$9~*7%WYpxyG<-`Rkml`^F=ukx>H*vRl{(cVV%b07Iw`L z!y>H#Ey-JBQb{EXOs(Xyr)PK5rG{5vpRcW6u4Rw1HHy(3t|mY}l-NE}{45aU) zwiu<&%3GU@s|QC%RTA9h7<$c?ku%Yz_;_x!>h61aJPk~wxmZ4r|vCgoV7OeH4Hs) zJFt5n`{VxRy%%Su2b!-AmZCTB&5_>A+Pfn}YN$ClY&xUAKkNU*@4^<{xKQ`kW%+}n z6Jbn^0{r3iX^rF~`*D5$U=PE<2h3Z6U&+jm?}cU-e6<>$wXUeO+AnykUXP#4JQKmG z?RwGQIe$MGFE(PH4V_b8m4EJvOnge>SpRj;x*Rs|P{~J^G&aa8jScJ>djw;1(}NoA zE`0j>RX||gZvZp0yZNB-4C^40a@_nwrzUdK6tQ_gsK@op46C#moROFZ|8w;#Lm`LX1_EDLG$NL%Gf9VWf4UfWkP$NW?M-L-_O z`RW$?cR}>#LfQ>tL&>fBOj+1*AtA~Gz45#4d~&P=FL598tFabXCiGz^4)o=2PcbCA zR2|5exI!CV7dmtZ?r54(DUnGwvw3kj`>b>q^L=6j z_qtlmjuYwp=rR2XyR2bPdZ_d)3*D;@ZL$C9pyQMH5z~V$5ebt!H~B81t$K<`uec-m z)3v|dv`PF4yOt>Ke5}17XnS<(u{HJ+cK65n#;)%0_uWD1Gm2si)X_ka>7THi0e_h8 zx3SaH!H4&&Z?UpW=r6uBPTy$}D4o11${nkK!6;dk)UKlZ?L9fITRe7&dIvoTEkh+6 z^pTh4{GCT}$^JZZ-i424zP7Pyern>t4f!$QJ5>2mtukKEqjzTfjFCNjvGX#~=gike zm|bi>U)HZ|*KCYZPR1Rq^%!vI5e^sev<;wduiXh@y5lEPaan}2-bYL+dNUC%sFe0W zb&`Erg-5IU1X4e)I=`&ueWkC-mWg`TNDT*iS4c{>oaKmRwI0^IsJ_9t>_vC+n@dO6 zj7o2&I2Xx4gLRengWjImi4Cil&Kw-&VcE~;M=sTHYFmUf>x?#2usFMjRP2);UI^sl z3w_OTcD15y!pZ}e1eb1ANjtRS#y5JD=Ygp~9--EkZ7<*sVX91anWUw;Em=0}j-hE}2!TcaKqQw^HScw_qg%u9Zyn zAYJ5HVah>@ZxY&mhgxy^>amlwE7|hNBH0d7^!r6r{#<^x6RkN&C%R;3Be|ZrKVi-K z(5~v@$Dc5xLAPV=eTC3jOUyjp;0vO^o(YM1(x;vQ_lo@5-B;}<^uYTI!~Q|#S}c+w>BUVK3x->to`2~=T6Ngl3)Nt8GtIx{ z)OQ7?ox8Vo5jWLh%P8o!guqnS@TtMvG8f(b*5d?=Yqm%&tL!aDPEszmNm%N9NzJyq zw(66!9Zb=$=S+y?#QD_vTdVlf67eSQ2F--x%)QLcwYf}7SW-{%?IYQg%l^9bM-Hyd z2Hx$AXDH-mb^N0tRr_`1!MR=P6P-6z1Z+0;q;roK2f|BM1zFLFEB&>`*f~PyGHS%0 zso2lr+Y4=)>0@Y-b`3U2D(*jCdo5n%l62bDWyoBduOCuLkjVbDm6jhXj02CSmr6H@ z);xpSub7Isi!u4)JmJfvaD6^e@nU@{MwOuxaiZ=e?S0uQ?!jWSRe|nSDigDep4CMz zX7|?~ilU;fM5xzt8|CtGl6d0g_u4aR&HTFErkwW+UekoT6pzt}JQ|^AY!bI-F~g{l zu~0EJk|A)df$p(lSD-dyh{?y&QWwe@Hoi1o?!FUvrA;G~I$S5MJyvz8pqq+|;C3f| zKSwq$9jOdXtJ6g5g)0xzF1#k88!0`C?(nj_(;Uh5Bi|YaU4`)=x-WQ^LQ0CU%b{{Z z17&dCJv}G64<9yiXo=Ppi{;52)X8sZu{niz{~Ce5s0N;`kLX&#=k>}|mua$N&nZ`0 z^6I+G`KowlEG?Zx_MivftL-#x^7T4!Q(D}@jO$$qF2(YwT=4fzg)W_Og(k*QQpOKEfV@-kk?snmIzLC zq1NCrfwr$W*5voq0{lfXPH(OV0gf%(n6mt07NiumfDhsh9hW6Eh-DN``ZbD$Q*qO$ zMQfgj&ti;bysmqmvm4CKCUg1W#98X@O27uG$$lR}wUIO%K`UP>&ZT*@RGi{m?fB-G zdg%T1H~ z$H@cQ=5bpE^;6!&ShFcc*<0f4t$a!J#7LwmcTFT}(hi89P_v@5FZ zDcLo%8_OHEv9BdmWO8Ls)#=KTJ~M?I)(K)Fvz@%{oOqtSaO7~Wu_CxGp{-kzl_CAx zHN_Z43?-e`3+6@5AFf^v(5Sv8D@pZfTQ@^3PNv<(B*e1X_km+VeqpYO)BOaCj`4kB zmZ9726Q+qFo$-qen%DOT-{wRf9Q@9?U)^VfQ=Zcrb-9}7wEveSO?+Nu7|H1T$LLQGXB;|ET~l80@# zE(@URAS)hwe$4B+k(l32J1L@;dx(j>!W(?iU4Hl2W~QEVr9m2Xn)^EsOOt1q)sNL! z3i0rq@^+@&s1ojAN@f;&89M=+H%qz+jLT$_uNMVq{k(3Xmz&AHM14kVX04A&$ce0E z6Cr}WVS#@qezEqVNrMV^)6H=fp?N{yb>=$}Jy^H&JXq9|Ty9m&8j_XX2^uOA!z8B8 zDDxdQkSR2Q2_o+vczOB_Zq!j6-nD4P`}Dm}eT!h{L^X4E6X)OoULK}*sdYRc=cR8NOExKTC*LWD))WxnRvT`4)zi!E7ADmdTF-#%#U8#|)%E$z_so6tZ!8NP*4mxKdM|oMMhBMn=E$n)V^PYj zqXg*wATM8c5LmgoHFP|gRujgx6s=VntIkSnEQME{RLGp(GVo$#@^1CBiwTBvH}0BD z$?@B@kh~fF5)=9&w2txXlZyiiFdHGmayO#a)i>+J{YG8*@TskC=;b6?gpvC`dM$qG z;z&sw4z5^DCUw$gRolW)YVq8>OL&6k6O*|W&i3#dE!1pp%Mwg7^I0$O(XQnw7+uwv z{!zEoaKFp@iQuMhVgdPuSV14udp9bF33-gVj&*`#%&*>FSm8z=YdJdDztP_}E@%^~ zHg7r~WNa4$RkZf|x<1;i9zw$9dz^0-*_wRIABBi%HBm9R!HU^3A9v)3X|+?E*%~#n zxPB(ha8X?Ho$&Gfh)2;~QJV(cpx)$lxUXAhLt(0^p#wG@53Eo_c;e8oL)g8-OS~}?`AtAaR8!bpUnhlhym~0J*A``NL9#d}`~E{R8H3)M``L$qT{#izE|HJ)-q%Z; z`%YnqiP;oH>$FR~^V-XFX(uvBHK~Pe-0spX*;!E{C2Zsg-L&$Z_xF5v9txF85|8g2 zm)gJ9lBpft4>=$*SX8HJXYN6Y;f}tEV)?ntG@6(R$I&etk0x&_Y^Tsvm}83HzRyLK z$94RPplg6Sjq)Jzjo0od7!e6d-5V&hUs)arr6Fx$w&pA4C*(Hy#=y|ppS%>-{0YN# zyf8B}H~;CxPZ*=rwDY4~T@rY+{2AHyN3Gz+(xXB_;geDSPF%-(zIUY;8(24|%A+?~ z_Re@~>biH62J$YNnc*bIlh1!;xtqq0x3d)!R#%=P{l@va0%na7%m|5O&Hs5HS=-@z zW!llhyNnk0d6m87;fY^tHG`N8xHVRqov~3U@UV z@Z`=-Zk@9o;2qjG zmnj!}wd`SX=}CKEtG1*suoH%HSzvbQOggnp^!!F4T(i9MG?ln5m{ z9t%%RPzq4eq~l}77DP5o-o=g|w4b#;{7BL9ge=5tO+|>6Fv=_}Uh1*7u&2rDqr@VL zVyD~L`PvQ)8JRV zgCW~<5MeRbxt+|$eWxj1!(5W$qVg?W%Ipmj2gzgK6&#OmidMoSy)QXFpwsZcBfuO|`<#3Siwx@OR?35Du*e76)@AgZKYytTo1!@}5}qq;p% z4*4~8=RQs@vi#*isW;=_hpeIw*BT235Vi9P^Rgq zu+iyLiLS7Pw4tNPQ0mFcJ#`$t@%;73G#4f!(w$l3-7J_DOX&hE9DQv?2<@BRCW_pb z)YgvqR^nmHbR35i$8u~hvdOCaB{g3$zG`lJLNWag9TOhx87+<7WeTl4oV9_x{Q`ID z@sh*j_o<8v@&-KbI#g2vL!z5*t=_7?`~8{j7i}JAt8T0Jv2xOs;v2-Js>i{&4xCuEio# z%d_u7-@@CKolO&PBv6B+2#Rj`>WgyoJ390&x1@O(Eco|F|A^i{cz9fQzxRRdG2Ss- z(B6AISfnOh#4*Ee~LZ|bY<|)cs+4~8*U*tb`H{5@ysObzhVRkyGj;-Q; z=Gu>Nd^+Os4pyx4GmRDRps>! zYspA+>f=d1RDYe0YengfYe_=<^^{i0XB+=b-EibiA3G}d#Nsm{M--%Sm+r1?Gpt+D z)aE8vwvcY1t|HARUi$DutD$i76uMEa3!hiS=*1GJ(b8u#VR+oe1W`99apSbCDxYbV z#ml@H*R*Rn4kNc*xm-xnjX+1SGEY(sHkY8HwqaEdQ{F!&7P zGTHG?GdDd`?L8JILytf^Ok;h(w*By}=Gn`iFi)+3(SxP*dt+ygD@^pY{;0dl$Iz$M zB{dDl?~@4JZ9FMyy~`-$ zK`13FeZ(WRkqjfM7*-jo0hBR?`AD2dW1{4q(`6y8(~MPzx}HQ+$s7!(julj}t}YoY z(yx*qJ>R^aa@)vj5v+Z(QB|_nax!e)@!X2Q=d+gFq0A#^wAsefyl2X1g>Ij1Kw^TA~M3q&3ahAF>XgBaU&zm?y)5XmC9A^+W{t$O&YPy z!uc(v(}j|YbWa`i{OK$wJoG=@|Ex78T%w4E%i0)FyZq(N5AvWB(WyLV?c?#Swv^uX zDTZkaPFx3zh(e=InThYa-9(*`5_i<8eZDtyD^!B0AG^bFkessAI8eiS(B&Hg}n4Z znfZjbL=(?-$kZwI2VBpUDbWtHz#j_rpq9$L!uGAmdeZml zs4CHqLM6^3(zRw{zlbUm05XLYTV_cLqHn|h4WP4pGp zZx!8TYaK3Gqh`S+w~^bXi1K6TH|R?$b7wHpjWUMF)-O!Fd@FeTpuTm&v5}xei-0EF z(i_S4MK%dz1hc(P+GD)Rn zBGxL}v*&5A$+9eTaZpW3iJ=A%isu)bo?66ToWR>WN*t72zNesTIjd_iLxGIa6P;e> zHABMus7Z)AoHd+6`cv(70g)edn}u3w9w;+mMEtuuqgm6Qj|HCDUZq}C&#uTC>zF5z zdh=z2=rQWi+Z76tH{=&)DT;V9N2{E@^X9!(6mQJ2e=JQ<%G!~Te349a#BBR%SqRUr zL6@6Y=1p`-yi?&LStAJ!gRfUZ31jY>Nz1=q4Hh5_bITjetcrdTb=yY4pgKYMyOycr z9qe0_bbglBDOo#c)GPE4_h5GXrS$^Iq--4h3LgpQpl?v#US>wVG#G7~dt|@7R+(Fv zaOVl5KbuQq_S$m~1<|aQVp~yXhvWU}rFzM|(hvgisIzHSA<<9E9t3*9fy@JmT4fR@ z({vUu1B2@pc!{X}>p3U`XBTCTE^8&}?b zSkjRlzP1_^$BSLo@X#t0I}L6Uk!8G6zc&>43bT#B5}$Xb8b2a~zFBU&o zG$~ZN`Hn_4hHP}{z2>GK%l8O5;sAFQiyQT1EMz{O5$!a$bhf9^A7P1^yH@9We<-mJ zT`FwBr!+FXiRSQ;;p5Eo>^MGKY-h+g&nU&GZH}@FRI95Lti$vyH2J!dnx7RP*1I1} zp)%iN_U27_Aoz6ZK|gDTh(sG1fx_!cLm)Fr`~R#$K{v#(w*y19eRGJ$ku$*#!9tJCC#QrAE`)pyc>r?NSpSg`cn_!iBAx zoQgdYD7GhW+t~@F3eBT1vzx)H<*BUO+VtFD$&-TKG&Jhs)fS|C%AGul`xuGxB~I&i zq7(X59qDMb$!Us5@iipY1Y$1bpP@;cXysl)oy>VYYr#=9-_VpEoXa(;I)7D|m~+np zuZXh0#VjMdU?GhvF+*_wL7^AMTcm2h(jx|xoAd3ZrrA^EXvD0lk9$K(A>f8uq9<_ zwUC3iM<2PLMZcW6BR+6{i7Lu`&lJs%uPSBBgx)NUfXiA2`bq0R6%nVd7Hgq*HmTbS zA3gH#2jXXWJ1sPKevPG%_v%pOPc@Opb|+`oXMVLz+OOqW%8i=TLu|jE@bqnxqN-vd z&E#jQ1zL&>(|X}vF1u)qF?F12E{VQzPM6(4C2Z)8^Ks}F;7W%f3%nEliB1pXL;B~@ zYuGs%`O8ljoF{TVD+a?J`E2oHwz+twvh?ttnRG_}IJ_p;lW9LI&HoIaBT&PY@p(uqcBl_5a-i@%E$QqNiSH{FKEmEd?@2Yz8 zcimj~#Zu+GKy=9vTP?pAFEZ(6*gFM{iLe|UKD*woey*cEmQX`I1}p;nLUn#hmkd*z zn3W}pxfIK+V=_q(g^o(}c1N=WMy(DrDTyfSDryvOT91AgQ%R*E=1b}%*dvu|KVhjG zcl4&%&6=7%YDSVWpd`3kh!?gXi&4wf6hI$XGiZC?Uilc!~Mm@JU8{m?dzqa7#bC(&u&Ckd40K3w)I zo;5dlOL45AB3vEJ_iM&rG*jhT9jSW>w811t3nma0kyx#Wm z+Q}trb0uSiEb&qs(_@KZBVD7Yf~}1$TNqHMs#jIqc4--9h>gs-kmG!u->`?Ov_UfW znd$Aos$j=Smy2LEWwf&)p;4h)09{MlD!MJ{-1lAeB=>;9=jGaw&iU7NSA=dn$3Ss$ z(w<*w4t#`;Ma9U1wVhqdI8QasyM5UqdgNZEMsvk6MyYrgiUBQiO2V2-D_%am$k1{n zPue>hO6bEiCYDKhf^}5~2dyEpw-@E^#JD)iTEgH!obm@+SY z;-@t#d&U_S2GcI?P``oqR(IvP9J0a7r#yqt=J`THKbDq$Y>3{#mK|}-W|ikL-kqA< z^Zp^IkrpsBgXx@R8!RAM7K>bDwW~X*?z|@7cXMi7J>|6dOyq0b3dPF{ES*|8ak-aB zdLvAh?rh%ldA!HXBB^0Ro3xQT@7ARd;MBSzZSoqk_%hn!zJ84*kJgSy>Z`3gA8qfl zANaK29_XQxQPu6DF*Thi!S(7(kk-W-bDP8(&Z0fKaIrWtQyP}A8McL(q zYOcaQd&j2VJs_IJB{ss&{RXGotoYEVyjh+^gYU(~`{rt7a%I^5YM3=@$|_l?7xW|I z>`BaLydxvO6inN9e9ynx*FZgr)$N4Wf8bJ+_w>T|E6I05A7+dVb?4M3$a#Bk+#6Uc zbqdUOnP<3ijqRq!ptqpRmpB&J5fgo#eA)7i+b>%;Eos>5pEJhgJyq!4kK#Q`d|coy z$9gL;t)!zN`>BqmyW+QakBr?OB5B6z2c^zx`Xsr_S;#1`A>Q35v73{5J$}c}r>VW_ z?y=EvGg~-Lw&k&=V|o!{b)Y>e$l%B$?Ru}TI+s%F_KjHA!A9sFU0ZugEQRK@ht=*u zeao`>#nR~eI@$qNMZ2xehDWRo-`}%ZI9B-A2#=tC5Rw!BDlBjk`onu!$n@ZC{AQZ) zneueUPCeb?(H-f*@m4yLdYU5`z;3}a$GER=xSF8$V~2esXVo`3=H zgnm``oZ9zO7)%RlxB{JnhZ6Jz*XN-ppuGw`FF?-|=y?%(-hwV(f{wh<5d}JmKu1*Q zC<7hA?}sb>aSp~_g`T2LP8#MeuF#KIpOXQ>S)Y91>hm*KFiG-v`eg%T?;KqA+ z)2;IZ#0)7l42A&{H+Hl$cHo7#{&^_u%0)aF7w@ko%)kAD{I6@^)z6=pFl^`usp9-K z4s--EgZAGaLFWx<;4<_;9N}mG{2%lI6#vr)Dbyb*m=QWk{ArLV#0(`crT_5Z0FCE> z`o8*O{<1&llK)+(|A?X~B!tPI9EusDgZm$i{BNg!LZA`ShX!<^4zf;>f9mkR+;#%- z0Xf#c9>7k39sX}WkYWGLpa0vK|98IsBJlqf0!T1)*x4pI>@Lh4<_>d#Il{=HY-kSi zfSz{HnG?(&dgTCfgBe5TiZxvxZu%&tJPjtxC{UH<%U76FPcB6h)ykw?D?g zd!fTLq3fU*IW!K8G5uq-HAKxEn%NvWl0!YfwY$(Y7iebqoPVcA4pW6VGln=3KA+7J z8jbp=8Qh@pt;Lg%8;IT&RQV}Z_1&*yc7Fa)xaLr4J$Z6Pci zV3y}J0oo4d^nu*oP@C&H_W*5jh&l_57dqOXk8p=LFo&*_pN|0?IY6%gEzl0;bcJ61 zq74wRhI9(F1}KC6fGT1pi}SufFIMMt;99cy<23;}mJ zxIBPoIH~}9Td2P|#Q(p?0QCKxV>s%d6|NmP+JENW`dl`|RX9q(HeDdeSfNqqFiB`M zuq|NC#t^n}8gO|5w|3`t?fI`6tk3!XofFq{YN8Nkju4jsD`4{gRX`Qa5nOY?ip(J- zfi3_NfSX@>bv*YZcsu9^u>ZR}08?N=_0aV{+mPlt&0o}jKftYo`JB?fM;G`P+$sRi zaC)U4H;cV1=#_E?`C=C)|SJngR#{YXMjQn+6mdpg-VYtmkp(&)R_70g!^_U$U`4 z1$X^ZXA!4LF122%^UCmg9Yn7To6GeB1vf$N#L|ztcoSXCNi;5k#Z_+JnbZ z>pxHdNCWQvDdNzdj{~{`T82krV@NMRzX;m{(HXQkK?s>cYh8e-CPWe72<8RS1JvLe zfLnk&)Ei+V|2fWF5YFtdTM)(HEgj4ad;^X$h|2K!LG*yfK!kiQP>U1P2G3@|j-?>o zf*A96Z37!}hme8iHMsx5;}4*YKp5m6mp}R-EDYo{fa~8g45A01`|nT(e(nmz9@jrv zhUa;70`c^B`-jI`lk@q2mw@O7bcKjwzz5*5(GkK05s`t+mJm%hs5d;`s-4dO=NcX* z?9XWcT6PcxkO}P0XMuasuXyyk9B__)aSZn;Zx{hNoGys!_J7z9kktgD3uZxR4H2`z z5sm~x8hG}$fqH}3`^%~=pccR#A_l;tlq)ocJJbr~`z0O1gMnwjIRyyWK~{m#4XBhr zR7CIrNAH*Z;rd44CJJc;%mR@76|I52yFzmQIfMR+h5!ekiGM~8w8ArjBMc!cp#2M< zUzH5-J2z;4a!8xN3*prS!rQH&D?p3DzTq+Z_WxmSg6r;AeuC#PxJQ5)LB0mEz^fqy zwttR}9FS*mLj3*ivmk22(}?VVKpCOqUvVDP2B4AyxPe?{25|*& zKx7%XCpbaZP5+e9K^%Z<9Mn6Aj0W=P-!mH5xhDc7ffc~BAv~`kG>OR4f6Y~Jnc?&Q z?zf1ln-!ub3R%mqdH~@saE-&`CWyACkRE{@{6g-pdF6L&Lg4ZD%!-KJ@MsF>@1L^_ z!jcek!gU6)g-Z(epkEq9PzF(*8{+kMt;1^)5I_G~jsEiGf0hD{(!a|k025H_{N2M4 zdO_6bzaj}dW56u{Zb2Xpz-!?DEPMRQfdEGqX!r2%RT!`<6^Jf~d7z3xL}#F5_?iV| zW3bWy2!p8c_ozUAZqLBC05-q^5MKGqBH zO6%WO$^YKwf46N!Rl)ra(g8%^dHH|JT>xibkpS!8t1-@hs6c=`;^!KKM zU$22rfY|^#AalF?fxgK(pFmO&WkLK0R{@TH=LYBMx zkR4pkS9NAEeyC=Zf~W$EN9YYv!^WJG{Epa7(S$5b#YsGHze0x5v@ zK=cG0!PlYi9-syA3L+dl3j!GsQ48qA_?%ZTA6P^E@<)ViASC^jDd1Sass6GE1aF9% z8SL^nL4WWa4|0eHpiP8apz;GV0AB|f!*`SbpTFumxCHqPoS2Eufc)k00a^KV)2J25ZwHAU*w+g4onE%?5fZGbZK0s93f2H-ycL8V4klY}~ z0=xQM+kf5VGe5@}o)v)heqSMjJv1=lmz^msk?90oDTQFF1F`fzuJ{7#Q#6V5=6v> zM=AKOBhUsspCddOM9zQm7ZAl%A=`jg-+*(tzC@v&U>WEMtj+;itt0k)fFxEBTJQ=4 z>?&HGV+t%6L|SnDmlcDk1!_>RZ-Af-_Y>gZ@Tdmt1oZoTCZK1q*JKI(fv5}T3tr!= zLTA8xfo1^ah;v|nf8Of^IKXoR&=9;Y9AQyNX3!VxGmyjRp?<8;Jgm@b#I7C41;*!@ z170VB+2AXCxJ3c4gRh1e&N2KeM_^|2Kdl^qc0eu#hykvFhy3TzmNO*)Y%45G#;FzpZ_7M0C;vr*bv;az z)iUsOptD~)p8w>FfA%(bMGMc6aJ+tT17-p5MEvp=-^cJhmJxQ~O3@oQfgo|6Gm zh-v`T@Blr4B#;$kSdi^N3(z50`vdL~)jj?B8MyY>SPI{_0zCm{@K^`8EVv#0H8O); zJg^53)}_E#{+%Y^mFwU0`Ty6>)r7`XMd9=2FK?!{G?|#%w9;uEVkosu+bAVu5n~*t zLov0DrHGamLJ1|67)vR|g#>XSrDWldtS`xzQ48XQ{78Ya`a4?vahSV%8HruxkpeL zWg6{u_5kmbv^UJTR&8Dy#xJIf+WEfO9cW9rMsY6S+Q$4j;y5Si9L^n^&VKBH0rVAT zH0^D_{BZQJY$I#jeGek_)(m@D`O96aa*y#S_x|yxBc3}_j&8PX{wl4KXT~*WGu=aK zi@5(*A7>=?A6v|GSo5r|yUO+2tUwRIIwk@n02>$p%D~saH^4n$8py$FEf1Uo-iKk? zHQ)lwR^A2LU=w!;$icYl1n_B#NC~(C!@i!aBE7(E;11B$D$)&H2R;G@wjm8L0Zan9 z?I;JB0saQEPa=Qd6JP>Jz%b4RvOpVf6&ME!J79bVOal*qNCtL+z!Bgj;3#kmcn28R zg))I=nBlYlPXMjJc7Qf-Gt*O3a$COlJvXH^@3|?SXTl*%rIOG+-Mc$AG9;%ylNSnE zIiF9)-U6Prt)qpcoGRFI2Enh{*3RDD*2qv}VusFivPEq}ZguDzSU$L#|Ij4$!G{B1iCxCMRc~8ER=j1mkBCi57 zPW0;Qft#3i6ZhQ2op8kK?r6u|#GF617mTtFpS2K@E?$ymK*q>DYl{iDD>+&~rV}4L z{;-WHV^+k2uwltcqQ~OXRDodO{}6zIj+^Hjc~peXN=&*WXCd5X_*-nXe z%D&15iCTT5WmH*|ig>7)hq4Wo+859>Xt-F<(5fueCL3}hO@j|%b(t>SiZ>VcShQ`8 zW+P{^5lND_&%s9ZTY%&G3h*H?io_h>e`wS7U7r{Y7tND_JITuWmqSucql$isqNGR< z`noD9-%b6d4VkU^W@l#+R+W@maLuO?7Ah%MKmNfbDIO~3p=?71j-dwf_#*Hg(7h4! zDXbH_V3F7hL+s0qI=8>AjncP=(;i}YsnOi&m+DWrW{kj=FNaKIP|E z-&LPSChA96d6^KKYj-=)1@vL<<{BQ7-yY^WOSff)sE<(>qaNmwk<08H<1QHw|6X(xZ~hmA#P{ef z7~5*956<2g`{U(U|JfeDOsuW(zmdG8K22SkdNg%t>d(}j{dzNX<{*9fANGTBe;E(| zE}8h#o!jqj;#W8Ei<|h_P26`AKe>q?-NaotvBv!PRxMq`X-4sSRl-q!N7D6gG$IXI z%Y>NGfDynrn-5BNhwPXf+nbsE>iM0M<>)Tpsqu0&3mm{#m;1+utd0>xm4l$#56Gqt z66@~&(F4bvGhm)Ozv!H?^Uh}>u0`Vbep4LMt`sz`xoA? z^y4I352XT<)D3Eq2p;|Rm$j%H(f5NX$wIk2KVPg%p>&qZHia#p^)+9r?#Eq$v1DCL z9X6qSb|uTE3zG-qe~JAbT^~y4UD9HmX}^{puKdj??DOiLA@1BfpU3@$-pw9{6xME3 z%>7jT+O?zo+;H<-@38GBuksXDIJF%cL_esVq_G&#UmyK$t@3{%?e9mn=DX?mypnkn SAHF{hEuwrJ`N@rH`~L- Date: Tue, 31 Dec 2024 12:27:00 -0500 Subject: [PATCH 04/10] feat(google-common): Google Search grounding tool formatter (#7448) Co-authored-by: jacoblee93 --- libs/langchain-google-common/src/index.ts | 2 + .../src/output_parsers.ts | 271 ++++++++++++++++++ .../src/tests/chat_models.test.ts | 2 +- .../src/tests/data/chat-6-mock.json | 2 +- .../src/tests/output_parsers.test.ts | 217 ++++++++++++++ 5 files changed, 492 insertions(+), 2 deletions(-) create mode 100644 libs/langchain-google-common/src/output_parsers.ts create mode 100644 libs/langchain-google-common/src/tests/output_parsers.test.ts diff --git a/libs/langchain-google-common/src/index.ts b/libs/langchain-google-common/src/index.ts index 373617c72e55..d1a37b6f5c70 100644 --- a/libs/langchain-google-common/src/index.ts +++ b/libs/langchain-google-common/src/index.ts @@ -2,6 +2,8 @@ export * from "./chat_models.js"; export * from "./llms.js"; export * from "./embeddings.js"; +export * from "./output_parsers.js"; + export * from "./auth.js"; export * from "./connection.js"; export * from "./types.js"; diff --git a/libs/langchain-google-common/src/output_parsers.ts b/libs/langchain-google-common/src/output_parsers.ts new file mode 100644 index 000000000000..8ca9377011ba --- /dev/null +++ b/libs/langchain-google-common/src/output_parsers.ts @@ -0,0 +1,271 @@ +import { BaseLLMOutputParser } from "@langchain/core/output_parsers"; +import { Callbacks } from "@langchain/core/callbacks/manager"; +import { ChatGeneration, Generation } from "@langchain/core/outputs"; +import { MessageContent } from "@langchain/core/messages"; +import { + GeminiGroundingChunk, + GeminiGroundingMetadata, + GeminiGroundingSupport, +} from "./types.js"; + +type Generations = Generation[] | ChatGeneration[]; + +type GroundingInfo = { + metadata: GeminiGroundingMetadata; + supports: GeminiGroundingSupport[]; +}; + +export abstract class BaseGoogleSearchOutputParser extends BaseLLMOutputParser { + lc_namespace: string[] = ["google_common", "output_parsers"]; + + protected generationToGroundingInfo( + generation: Generation | ChatGeneration + ): GroundingInfo | undefined { + if ("message" in generation) { + const responseMetadata = generation?.message?.response_metadata; + const metadata = responseMetadata.groundingMetadata; + const supports = + responseMetadata.groundingSupport ?? metadata.groundingSupports ?? []; + if (metadata) { + return { + metadata, + supports, + }; + } + } + return undefined; + } + + protected generationsToGroundingInfo( + generations: Generations + ): GroundingInfo | undefined { + for (const generation of generations) { + const info = this.generationToGroundingInfo(generation); + if (info !== undefined) { + return info; + } + } + return undefined; + } + + protected generationToString( + generation: Generation | ChatGeneration + ): string { + if ("message" in generation) { + const content: MessageContent = generation?.message?.content; + if (typeof content === "string") { + return content; + } else { + return content + .map((c) => { + if (c?.type === "text") { + return c?.text ?? ""; + } else { + return ""; + } + }) + .reduce( + (previousValue, currentValue) => `${previousValue}${currentValue}` + ); + } + } + return generation.text; + } + + protected generationsToString(generations: Generations): string { + return generations + .map((generation) => this.generationToString(generation)) + .reduce( + (previousValue, currentValue) => `${previousValue}${currentValue}` + ); + } + + protected abstract segmentPrefix( + grounding: GroundingInfo, + support: GeminiGroundingSupport, + index: number + ): string | undefined; + + protected abstract segmentSuffix( + grounding: GroundingInfo, + support: GeminiGroundingSupport, + index: number + ): string | undefined; + + protected annotateSegment( + text: string, + grounding: GroundingInfo, + support: GeminiGroundingSupport, + index: number + ): string { + const start = support.segment.startIndex ?? 0; + const end = support.segment.endIndex; + + const textBefore = text.substring(0, start); + const textSegment = text.substring(start, end); + const textAfter = text.substring(end); + + const textPrefix = this.segmentPrefix(grounding, support, index) ?? ""; + const textSuffix = this.segmentSuffix(grounding, support, index) ?? ""; + + return `${textBefore}${textPrefix}${textSegment}${textSuffix}${textAfter}`; + } + + protected annotateTextSegments( + text: string, + grounding: GroundingInfo + ): string { + // Go through each support info in reverse, since the segment info + // is sorted, and we won't need to adjust string indexes this way. + let ret = text; + for (let co = grounding.supports.length - 1; co >= 0; co -= 1) { + const support = grounding.supports[co]; + ret = this.annotateSegment(ret, grounding, support, co); + } + return ret; + } + + protected abstract textPrefix( + text: string, + grounding: GroundingInfo + ): string | undefined; + + protected abstract textSuffix( + text: string, + grounding: GroundingInfo + ): string | undefined; + + /** + * Google requires us to + * "Display the Search Suggestion exactly as provided without any modifications" + * So this will typically be called from the textSuffix() method to get + * a string that renders HTML. + * See https://ai.google.dev/gemini-api/docs/grounding/search-suggestions + * @param grounding + */ + protected searchSuggestion(grounding: GroundingInfo): string { + return grounding.metadata.searchEntryPoint?.renderedContent ?? ""; + } + + protected annotateText(text: string, grounding: GroundingInfo): string { + const prefix = this.textPrefix(text, grounding) ?? ""; + const suffix = this.textSuffix(text, grounding) ?? ""; + const body = this.annotateTextSegments(text, grounding); + return `${prefix}${body}${suffix}`; + } + + async parseResult( + generations: Generations, + _callbacks?: Callbacks + ): Promise { + const text = this.generationsToString(generations); + + const grounding = this.generationsToGroundingInfo(generations); + if (!grounding) { + return text; + } + + return this.annotateText(text, grounding); + } +} + +export class SimpleGoogleSearchOutputParser extends BaseGoogleSearchOutputParser { + protected segmentPrefix( + _grounding: GroundingInfo, + _support: GeminiGroundingSupport, + _index: number + ): string | undefined { + return undefined; + } + + protected segmentSuffix( + _grounding: GroundingInfo, + support: GeminiGroundingSupport, + _index: number + ): string | undefined { + const indices: number[] = support.groundingChunkIndices.map((i) => i + 1); + return ` [${indices.join(", ")}]`; + } + + protected textPrefix(_text: string, _grounding: GroundingInfo): string { + return "Google Says:\n"; + } + + protected chunkToString(chunk: GeminiGroundingChunk, index: number): string { + const info = chunk.retrievedContext ?? chunk.web; + return `${index + 1}. ${info.title} - ${info.uri}`; + } + + protected textSuffix(_text: string, grounding: GroundingInfo): string { + let ret = "\n"; + const chunks: GeminiGroundingChunk[] = grounding.metadata.groundingChunks; + chunks.forEach((chunk, index) => { + ret = `${ret}${this.chunkToString(chunk, index)}\n`; + }); + return ret; + } +} + +export class MarkdownGoogleSearchOutputParser extends BaseGoogleSearchOutputParser { + protected segmentPrefix( + _grounding: GroundingInfo, + _support: GeminiGroundingSupport, + _index: number + ): string | undefined { + return undefined; + } + + protected chunkLink(grounding: GroundingInfo, index: number): string { + const chunk = grounding.metadata.groundingChunks[index]; + const url = chunk.retrievedContext?.uri ?? chunk.web?.uri; + const num = index + 1; + return `[[${num}](${url})]`; + } + + protected segmentSuffix( + grounding: GroundingInfo, + support: GeminiGroundingSupport, + _index: number + ): string | undefined { + let ret = ""; + support.groundingChunkIndices.forEach((chunkIndex) => { + const link = this.chunkLink(grounding, chunkIndex); + ret = `${ret}${link}`; + }); + return ret; + } + + protected textPrefix( + _text: string, + _grounding: GroundingInfo + ): string | undefined { + return undefined; + } + + protected chunkSuffixLink( + chunk: GeminiGroundingChunk, + index: number + ): string { + const num = index + 1; + const info = chunk.retrievedContext ?? chunk.web; + const url = info.uri; + const site = info.title; + return `${num}. [${site}](${url})`; + } + + protected textSuffix( + _text: string, + grounding: GroundingInfo + ): string | undefined { + let ret = "\n**Search Sources**\n"; + const chunks: GeminiGroundingChunk[] = grounding.metadata.groundingChunks; + chunks.forEach((chunk, index) => { + ret = `${ret}${this.chunkSuffixLink(chunk, index)}\n`; + }); + + const search = this.searchSuggestion(grounding); + ret = `${ret}\n${search}`; + + return ret; + } +} diff --git a/libs/langchain-google-common/src/tests/chat_models.test.ts b/libs/langchain-google-common/src/tests/chat_models.test.ts index 5726d9fd445e..0192c5b67710 100644 --- a/libs/langchain-google-common/src/tests/chat_models.test.ts +++ b/libs/langchain-google-common/src/tests/chat_models.test.ts @@ -35,7 +35,7 @@ import { import { removeAdditionalProperties } from "../utils/zod_to_gemini_parameters.js"; import { MessageGeminiSafetyHandler } from "../utils/index.js"; -class ChatGoogle extends ChatGoogleBase { +export class ChatGoogle extends ChatGoogleBase { constructor(fields?: ChatGoogleBaseInput) { super(fields); } diff --git a/libs/langchain-google-common/src/tests/data/chat-6-mock.json b/libs/langchain-google-common/src/tests/data/chat-6-mock.json index 796fdcf9bcee..f4966c2e2f94 100644 --- a/libs/langchain-google-common/src/tests/data/chat-6-mock.json +++ b/libs/langchain-google-common/src/tests/data/chat-6-mock.json @@ -67,7 +67,7 @@ "endIndex": 611, "text": "Shohei Ohtani, in his first year with the Dodgers, also experienced his first post-season appearance." }, - "groundingChunkIndices": [0], + "groundingChunkIndices": [0, 2], "confidenceScores": [0.95767003] } ], diff --git a/libs/langchain-google-common/src/tests/output_parsers.test.ts b/libs/langchain-google-common/src/tests/output_parsers.test.ts new file mode 100644 index 000000000000..ac3c135f5279 --- /dev/null +++ b/libs/langchain-google-common/src/tests/output_parsers.test.ts @@ -0,0 +1,217 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { test } from "@jest/globals"; +import { MockClientAuthInfo, mockId } from "./mock.js"; +import { ChatGoogle } from "./chat_models.test.js"; +import { + MarkdownGoogleSearchOutputParser, + SimpleGoogleSearchOutputParser, +} from "../output_parsers.js"; + +describe("GoogleSearchOutputParsers", () => { + test("Simple", async () => { + const record: Record = {}; + const projectId = mockId(); + const authOptions: MockClientAuthInfo = { + record, + projectId, + resultFile: "chat-6-mock.json", + }; + + const searchRetrievalTool = { + googleSearchRetrieval: { + dynamicRetrievalConfig: { + mode: "MODE_DYNAMIC", + dynamicThreshold: 0.7, // default is 0.7 + }, + }, + }; + const model = new ChatGoogle({ + authOptions, + modelName: "gemini-1.5-pro-002", + temperature: 0, + maxRetries: 0, + }).bindTools([searchRetrievalTool]); + + const parser = new SimpleGoogleSearchOutputParser(); + + const chain = model.pipe(parser); + + const result = await chain.invoke("Who won the 2024 MLB World Series?"); + + const expectation = + "Google Says:\n" + + "The Los Angeles Dodgers won the 2024 World Series, defeating the New York Yankees 4-1 in the series. [1] The Dodgers clinched the title with a 7-6 comeback victory in Game 5 at Yankee Stadium on Wednesday, October 30th. This was their eighth World Series title overall and their second in the past five years. It was also their first World Series win in a full season since 1988. [2] Mookie Betts earned his third World Series ring (2018, 2020, and 2024), becoming the only active player with three championships. [3] Shohei Ohtani, in his first year with the Dodgers, also experienced his first post-season appearance. [1, 3]\n" + + "\n" + + "1. bbc.com - https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcTYmdnM71OvWYUTG4JggmRj8cIIgA2KtKas5RPj09CiALB4n8hl-SfCD6r8WnimL2psBoYmEN9ng9sENjpeP5VxgLMTlm0zgxhrWFfx3yA6B_n0N9j-BgHLISAUi-_Ql4_Buyw68Svq-3v6BgrXzn9hLOtK\n" + + "2. mlb.com - https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcQRhhvHTdpb8OMOEMVxv9fkevPoMWMnhrpuC7E0E0R94xmFxT9Vv5na1hMrfHGKxVZ9aE3PgCAs5nftC3iAkeD7B6ZTfKGH2Im1CqssMM7zorGx1Ds5_7QPPBDQps_JvpkOuvRluGCVg8KwNaIU-hm3Kg==\n" + + "3. youtube.com - https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcSwvb2t622A2ZpKxqOWKy16L1mEUvmsAJoHjaR7uffKO71SeZkpdRXRsST9HJzJkGSkMF9kOaXGoDtcvUrttqKYOQHvHSUBYO7LWMlU00KyNlSoQzrBsgN4KuJ4O4acnNyNCSVX3-E=\n"; + + expect(result).toEqual(expectation); + }); + + test("Markdown", async () => { + const record: Record = {}; + const projectId = mockId(); + const authOptions: MockClientAuthInfo = { + record, + projectId, + resultFile: "chat-6-mock.json", + }; + + const searchRetrievalTool = { + googleSearchRetrieval: { + dynamicRetrievalConfig: { + mode: "MODE_DYNAMIC", + dynamicThreshold: 0.7, // default is 0.7 + }, + }, + }; + const model = new ChatGoogle({ + authOptions, + modelName: "gemini-1.5-pro-002", + temperature: 0, + maxRetries: 0, + }).bindTools([searchRetrievalTool]); + + const parser = new MarkdownGoogleSearchOutputParser(); + + const chain = model.pipe(parser); + + const result = await chain.invoke("Who won the 2024 MLB World Series?"); + + const expectation = + "The Los Angeles Dodgers won the 2024 World Series, defeating the New York Yankees 4-1 in the series.[[1](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcTYmdnM71OvWYUTG4JggmRj8cIIgA2KtKas5RPj09CiALB4n8hl-SfCD6r8WnimL2psBoYmEN9ng9sENjpeP5VxgLMTlm0zgxhrWFfx3yA6B_n0N9j-BgHLISAUi-_Ql4_Buyw68Svq-3v6BgrXzn9hLOtK)] The Dodgers clinched the title with a 7-6 comeback victory in Game 5 at Yankee Stadium on Wednesday, October 30th. This was their eighth World Series title overall and their second in the past five years. It was also their first World Series win in a full season since 1988.[[2](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcQRhhvHTdpb8OMOEMVxv9fkevPoMWMnhrpuC7E0E0R94xmFxT9Vv5na1hMrfHGKxVZ9aE3PgCAs5nftC3iAkeD7B6ZTfKGH2Im1CqssMM7zorGx1Ds5_7QPPBDQps_JvpkOuvRluGCVg8KwNaIU-hm3Kg==)] Mookie Betts earned his third World Series ring (2018, 2020, and 2024), becoming the only active player with three championships.[[3](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcSwvb2t622A2ZpKxqOWKy16L1mEUvmsAJoHjaR7uffKO71SeZkpdRXRsST9HJzJkGSkMF9kOaXGoDtcvUrttqKYOQHvHSUBYO7LWMlU00KyNlSoQzrBsgN4KuJ4O4acnNyNCSVX3-E=)] Shohei Ohtani, in his first year with the Dodgers, also experienced his first post-season appearance.[[1](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcTYmdnM71OvWYUTG4JggmRj8cIIgA2KtKas5RPj09CiALB4n8hl-SfCD6r8WnimL2psBoYmEN9ng9sENjpeP5VxgLMTlm0zgxhrWFfx3yA6B_n0N9j-BgHLISAUi-_Ql4_Buyw68Svq-3v6BgrXzn9hLOtK)][[3](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcSwvb2t622A2ZpKxqOWKy16L1mEUvmsAJoHjaR7uffKO71SeZkpdRXRsST9HJzJkGSkMF9kOaXGoDtcvUrttqKYOQHvHSUBYO7LWMlU00KyNlSoQzrBsgN4KuJ4O4acnNyNCSVX3-E=)]\n" + + "\n" + + "**Search Sources**\n" + + "1. [bbc.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcTYmdnM71OvWYUTG4JggmRj8cIIgA2KtKas5RPj09CiALB4n8hl-SfCD6r8WnimL2psBoYmEN9ng9sENjpeP5VxgLMTlm0zgxhrWFfx3yA6B_n0N9j-BgHLISAUi-_Ql4_Buyw68Svq-3v6BgrXzn9hLOtK)\n" + + "2. [mlb.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcQRhhvHTdpb8OMOEMVxv9fkevPoMWMnhrpuC7E0E0R94xmFxT9Vv5na1hMrfHGKxVZ9aE3PgCAs5nftC3iAkeD7B6ZTfKGH2Im1CqssMM7zorGx1Ds5_7QPPBDQps_JvpkOuvRluGCVg8KwNaIU-hm3Kg==)\n" + + "3. [youtube.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcSwvb2t622A2ZpKxqOWKy16L1mEUvmsAJoHjaR7uffKO71SeZkpdRXRsST9HJzJkGSkMF9kOaXGoDtcvUrttqKYOQHvHSUBYO7LWMlU00KyNlSoQzrBsgN4KuJ4O4acnNyNCSVX3-E=)\n" + + "\n" + + "\n" + + '
\n' + + '
\n' + + ' \n' + + ' \n' + + ' \n' + + ' \n' + + ' \n' + + " \n" + + ' \n' + + ' \n' + + ' \n' + + ' \n' + + ' \n' + + ' \n' + + " \n" + + '
\n' + + "
\n" + + ' \n" + + "
\n"; + + expect(result).toEqual(expectation); + }); +}); From 9a097fdc0137cf5012902f572983b8e21af38233 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Tue, 31 Dec 2024 09:36:11 -0800 Subject: [PATCH 05/10] docs: Small docs tweak (#7452) --- .../docs/integrations/text_embedding/transformers.mdx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/core_docs/docs/integrations/text_embedding/transformers.mdx b/docs/core_docs/docs/integrations/text_embedding/transformers.mdx index c789a125aab8..97f08776ab6a 100644 --- a/docs/core_docs/docs/integrations/text_embedding/transformers.mdx +++ b/docs/core_docs/docs/integrations/text_embedding/transformers.mdx @@ -10,6 +10,11 @@ It runs locally and even works directly in the browser, allowing you to create w You'll need to install the [@huggingface/transformers](https://www.npmjs.com/package/@huggingface/transformers) package as a peer dependency: +:::tip Compatibility +If you are using a version of community older than 0.3.21, install the older `@xenova/transformers` package and +import the embeddings from `"@langchain/community/embeddings/hf_transformers"` below. +::: + ```bash npm2yarn npm install @huggingface/transformers ``` From 3c48dd18c154b079d1dc4930b3dd2711d2480c43 Mon Sep 17 00:00:00 2001 From: mgiorgino-iobeya <132499143+mgiorgino-iobeya@users.noreply.github.com> Date: Tue, 31 Dec 2024 18:56:36 +0100 Subject: [PATCH 06/10] feat(community): jira document loader (#7294) --- .../document_loaders/web_loaders/jira.mdx | 23 + examples/.env.example | 6 +- examples/src/document_loaders/jira.ts | 26 ++ libs/langchain-community/.env.example | 4 + libs/langchain-community/.gitignore | 4 + libs/langchain-community/langchain.config.js | 1 + libs/langchain-community/package.json | 13 + .../document_loaders/tests/jira.int.test.ts | 209 +++++++++ .../src/document_loaders/tests/jira.test.ts | 267 +++++++++++ .../src/document_loaders/web/jira.ts | 441 ++++++++++++++++++ .../src/load/import_map.ts | 1 + 11 files changed, 994 insertions(+), 1 deletion(-) create mode 100644 docs/core_docs/docs/integrations/document_loaders/web_loaders/jira.mdx create mode 100644 examples/src/document_loaders/jira.ts create mode 100644 libs/langchain-community/.env.example create mode 100644 libs/langchain-community/src/document_loaders/tests/jira.int.test.ts create mode 100644 libs/langchain-community/src/document_loaders/tests/jira.test.ts create mode 100644 libs/langchain-community/src/document_loaders/web/jira.ts diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/jira.mdx b/docs/core_docs/docs/integrations/document_loaders/web_loaders/jira.mdx new file mode 100644 index 000000000000..fc77f4a14ab1 --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/jira.mdx @@ -0,0 +1,23 @@ +--- +sidebar_class_name: node-only +--- + +# Jira + +:::tip Compatibility +Only available on Node.js. +::: + +This covers how to load document objects from issues in a Jira projects. + +## Credentials + +- You'll need to set up an access token and provide it along with your Jira username in order to authenticate the request +- You'll also need the project key and host URL for the project containing the issues to load as documents. + +## Usage + +import CodeBlock from "@theme/CodeBlock"; +import Example from "@examples/document_loaders/jira.ts"; + +{Example} diff --git a/examples/.env.example b/examples/.env.example index 2abb8d8e6912..9aae33991e92 100644 --- a/examples/.env.example +++ b/examples/.env.example @@ -83,4 +83,8 @@ FRIENDLI_TEAM=ADD_YOURS_HERE # https://suite.friendli.ai/ HANA_HOST=HANA_DB_ADDRESS HANA_PORT=HANA_DB_PORT HANA_UID=HANA_DB_USER -HANA_PWD=HANA_DB_PASSWORD \ No newline at end of file +HANA_PWD=HANA_DB_PASSWORD +JIRA_HOST=ADD_YOURS_HERE +JIRA_USERNAME=ADD_YOURS_HERE +JIRA_ACCESS_TOKEN=ADD_YOURS_HERE +JIRA_PROJECT_KEY=ADD_YOURS_HERE \ No newline at end of file diff --git a/examples/src/document_loaders/jira.ts b/examples/src/document_loaders/jira.ts new file mode 100644 index 000000000000..50b2a9511c45 --- /dev/null +++ b/examples/src/document_loaders/jira.ts @@ -0,0 +1,26 @@ +import { JiraProjectLoader } from "@langchain/community/document_loaders/web/jira"; + +const host = process.env.JIRA_HOST || "https://jira.example.com"; +const username = process.env.JIRA_USERNAME; +const accessToken = process.env.JIRA_ACCESS_TOKEN; +const projectKey = process.env.JIRA_PROJECT_KEY || "PROJ"; + +if (username && accessToken) { + // Created within last 30 days + const createdAfter = new Date(); + createdAfter.setDate(createdAfter.getDate() - 30); + const loader = new JiraProjectLoader({ + host, + projectKey, + username, + accessToken, + createdAfter, + }); + + const documents = await loader.load(); + console.log(`Loaded ${documents.length} Jira document(s)`); +} else { + console.log( + "You must provide a username and access token to run this example." + ); +} diff --git a/libs/langchain-community/.env.example b/libs/langchain-community/.env.example new file mode 100644 index 000000000000..2c36f95558b3 --- /dev/null +++ b/libs/langchain-community/.env.example @@ -0,0 +1,4 @@ +JIRA_HOST=ADD_YOURS_HERE +JIRA_USERNAME=ADD_YOURS_HERE +JIRA_ACCESS_TOKEN=ADD_YOURS_HERE +JIRA_PROJECT_KEY=ADD_YOURS_HERE diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index 442ff89b42e9..4eb6145b3b32 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -930,6 +930,10 @@ document_loaders/web/imsdb.cjs document_loaders/web/imsdb.js document_loaders/web/imsdb.d.ts document_loaders/web/imsdb.d.cts +document_loaders/web/jira.cjs +document_loaders/web/jira.js +document_loaders/web/jira.d.ts +document_loaders/web/jira.d.cts document_loaders/web/figma.cjs document_loaders/web/figma.js document_loaders/web/figma.d.ts diff --git a/libs/langchain-community/langchain.config.js b/libs/langchain-community/langchain.config.js index 0ea7bff3a182..0f76de04409d 100644 --- a/libs/langchain-community/langchain.config.js +++ b/libs/langchain-community/langchain.config.js @@ -288,6 +288,7 @@ export const config = { "document_loaders/web/gitbook": "document_loaders/web/gitbook", "document_loaders/web/hn": "document_loaders/web/hn", "document_loaders/web/imsdb": "document_loaders/web/imsdb", + "document_loaders/web/jira": "document_loaders/web/jira", "document_loaders/web/figma": "document_loaders/web/figma", "document_loaders/web/firecrawl": "document_loaders/web/firecrawl", "document_loaders/web/github": "document_loaders/web/github", diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 4695474df42c..141c6f38eef2 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -2820,6 +2820,15 @@ "import": "./document_loaders/web/imsdb.js", "require": "./document_loaders/web/imsdb.cjs" }, + "./document_loaders/web/jira": { + "types": { + "import": "./document_loaders/web/jira.d.ts", + "require": "./document_loaders/web/jira.d.cts", + "default": "./document_loaders/web/jira.d.ts" + }, + "import": "./document_loaders/web/jira.js", + "require": "./document_loaders/web/jira.cjs" + }, "./document_loaders/web/figma": { "types": { "import": "./document_loaders/web/figma.d.ts", @@ -4107,6 +4116,10 @@ "document_loaders/web/imsdb.js", "document_loaders/web/imsdb.d.ts", "document_loaders/web/imsdb.d.cts", + "document_loaders/web/jira.cjs", + "document_loaders/web/jira.js", + "document_loaders/web/jira.d.ts", + "document_loaders/web/jira.d.cts", "document_loaders/web/figma.cjs", "document_loaders/web/figma.js", "document_loaders/web/figma.d.ts", diff --git a/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts b/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts new file mode 100644 index 000000000000..e01d1d65663b --- /dev/null +++ b/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts @@ -0,0 +1,209 @@ +/** + * NOTE: Env var should be set, and configured project should exist + */ +import { Document } from "@langchain/core/documents"; +import { expect, test } from "@jest/globals"; +import { + JiraIssue, + JiraProjectLoader, + JiraProjectLoaderParams, +} from "../web/jira.js"; + +describe("JiraProjectLoader Integration Tests", () => { + const JIRA_HOST = requireEnvVar("JIRA_HOST"); + const JIRA_USERNAME = requireEnvVar("JIRA_USERNAME"); + const JIRA_ACCESS_TOKEN = requireEnvVar("JIRA_ACCESS_TOKEN"); + const JIRA_PROJECT_KEY = requireEnvVar("JIRA_PROJECT_KEY"); + const jiraConf: JiraProjectLoaderParams = { + host: JIRA_HOST, + projectKey: JIRA_PROJECT_KEY, + username: JIRA_USERNAME, + accessToken: JIRA_ACCESS_TOKEN, + limitPerRequest: 20, + }; + + test("should load Jira project issues as documents successfully", async () => { + const docs = await loadJiraDocsUntil((docs) => docs.length > 0); + + expect(docs).toBeDefined(); + expect(Array.isArray(docs)).toBe(true); + + if (docs.length < 1) { + // Skip test if not enough issues available + return; + } + const firstDoc = docs[0]; + + // Check document structure + expect(firstDoc).toHaveProperty("pageContent"); + expect(firstDoc).toHaveProperty("metadata"); + + // Check metadata + expect(firstDoc.metadata).toHaveProperty("id"); + expect(firstDoc.metadata).toHaveProperty("host", JIRA_HOST); + expect(firstDoc.metadata).toHaveProperty("projectKey", JIRA_PROJECT_KEY); + + // Check pageContent contains essential Jira issue information + const content = firstDoc.pageContent; + expect(content).toContain("Issue:"); + expect(content).toContain("Project:"); + expect(content).toContain("Status:"); + expect(content).toContain("Priority:"); + expect(content).toContain("Type:"); + expect(content).toContain("Creator:"); + }); + + test("should filter issues based on createdAfter date", async () => { + // First load at least 2 issues with different creation dates (ignoring time) + const baseIssues = await loadJiraIssuesUntil(haveTwoDifferentCreationDates); + if (baseIssues.length < 2) { + // Skip test if not enough issues available + return; + } + + // Create a map from date string without time to list of issues + const dateToIssueMap = new Map(); + baseIssues.forEach((issue) => { + const date = asStringWithoutTime(new Date(issue.fields.created)); + dateToIssueMap.set(date, (dateToIssueMap.get(date) ?? []).concat(issue)); + }); + // Convert map to list of {date, issues} + const issuesGroupedByDate = Array.from( + dateToIssueMap, + ([date, issues]) => ({ date, issues }) + ); + issuesGroupedByDate.sort((a, b) => a.date.localeCompare(b.date)); + + // Pick middle date to split issues in two groups + const middleIndex = Math.floor(issuesGroupedByDate.length / 2); + const middleDate = new Date(issuesGroupedByDate[middleIndex].date); + const issuesAfterMiddle = issuesGroupedByDate + .slice(middleIndex) + .flatMap(({ issues }) => issues); + + // Load issues created after middle date + const loader = new JiraProjectLoader({ + ...jiraConf, + createdAfter: middleDate, + }); + + const filteredDocs = await loader.load(); + + // Verify we got the expected issues + expect(filteredDocs.length).toBeGreaterThan(0); + expect(filteredDocs.length).toBeLessThan(baseIssues.length); + + // Verify all returned issues are created after our cutoff date + const middleDateTimestamp = middleDate.getTime(); + filteredDocs.forEach((doc) => { + const issueDateString = doc.pageContent + .split("\n") + .filter((line) => /^Created: /.test(line))[0] + .replace("Created: ", ""); + const issueDateTimestamp = new Date( + asStringWithoutTime(new Date(issueDateString)) + ).getTime(); + expect(issueDateTimestamp).toBeGreaterThanOrEqual(middleDateTimestamp); + }); + + // Verify we got the same issues as in our original set + const filteredIds = new Set(filteredDocs.map((d) => d.metadata.id)); + const expectedIds = new Set(issuesAfterMiddle.map((issue) => issue.id)); + expect(filteredIds).toEqual(expectedIds); + }); + + test("should handle invalid credentials", async () => { + const loader = new JiraProjectLoader({ + ...jiraConf, + username: "invalid_username", + accessToken: "invalid_token", + }); + + const docs = await loader.load(); + expect(docs).toEqual([]); + }); + + test("should handle invalid project key", async () => { + const loader = new JiraProjectLoader({ + ...jiraConf, + projectKey: "INVALID_PROJECT_KEY", + }); + + const docs = await loader.load(); + expect(docs).toEqual([]); + }); + + function requireEnvVar(name: string): string { + // eslint-disable-next-line no-process-env + const value = process.env[name]; + if (!value) { + throw new Error(`environment variable "${name}" must be set`); + } + return value; + } + + function asStringWithoutTime(date: Date): string { + return date.toISOString().split("T")[0]; + } + + function sameDate(a: Date, b: Date) { + return asStringWithoutTime(a) === asStringWithoutTime(b); + } + + function haveTwoDifferentCreationDates(issues: JiraIssue[]): boolean { + return ( + issues.length >= 2 && + issues + .slice(1) + .some( + (issue) => + !sameDate( + new Date(issue.fields.created), + new Date(issues[0].fields.created) + ) + ) + ); + } + + async function loadJiraDocsUntil(predicate: (docs: Document[]) => boolean) { + const load = (createdAfter: Date) => + new JiraProjectLoader({ + ...jiraConf, + createdAfter, + }).load(); + return loadUntil(load, predicate); + } + + async function loadJiraIssuesUntil( + predicate: (docs: JiraIssue[]) => boolean + ) { + const load = (createdAfter: Date) => + new JiraProjectLoader({ + ...jiraConf, + createdAfter, + }).loadAsIssues(); + return loadUntil(load, predicate); + } + + async function loadUntil( + loadCreatedAfter: (date: Date) => Promise, + predicate: (loaded: T[]) => boolean + ): Promise { + const now = new Date(); + let months = 1; + const maxMonths = 120; + + let loaded: T[] = []; + while (!predicate(loaded) && months < maxMonths) { + const createdAfter = new Date(now); + createdAfter.setDate(now.getDate() - months * 30); + loaded = await loadCreatedAfter(createdAfter); + months *= 1.2; + } + + if (months >= maxMonths) { + return []; + } + return loaded; + } +}); diff --git a/libs/langchain-community/src/document_loaders/tests/jira.test.ts b/libs/langchain-community/src/document_loaders/tests/jira.test.ts new file mode 100644 index 000000000000..92b1224446e0 --- /dev/null +++ b/libs/langchain-community/src/document_loaders/tests/jira.test.ts @@ -0,0 +1,267 @@ +import { faker } from "@faker-js/faker"; +import { + JiraDocumentConverter, + JiraIssue, + JiraUser, + JiraIssueType, + JiraPriority, + JiraProgress, + JiraProject, + JiraStatus, + JiraStatusCategory, +} from "../web/jira.js"; + +describe("JiraDocumentConverter Unit Tests", () => { + function getConverter() { + return new JiraDocumentConverter({ + projectKey: "PROJ", + host: "https://example.com", + }); + } + + it("should handle missing optional fields", () => { + const issue: JiraIssue = someJiraIssue(); + delete issue.fields.assignee; + delete issue.fields.duedate; + + const converter = getConverter(); + const document = converter.convertToDocuments([issue])[0]; + + expect(document).toBeDefined(); + expect(document.pageContent).toContain(issue.fields.summary); + expect(document.pageContent).toContain("Assignee: Unassigned"); + expect(document.pageContent).not.toMatch(/.*^Due Date: .*/m); + expect(document.metadata).toEqual({ + id: issue.id, + host: converter.host, + projectKey: converter.projectKey, + }); + }); + + it("should format the document content properly", () => { + const converter = getConverter(); + const issue = someJiraIssue(); + const document = converter.convertToDocuments([issue])[0]; + + expect(document.pageContent).toContain(issue.fields.summary); + expect(document.pageContent).toContain(issue.fields.description); + expect(document.pageContent).toContain( + issue.fields.labels?.join(", ") || "" + ); + expect(document.pageContent).toContain( + issue.fields.reporter?.displayName || "" + ); + expect(document.pageContent).toContain( + issue.fields.assignee?.displayName || "Unassigned" + ); + expect(document.pageContent).toContain(issue.fields.duedate || ""); + expect(document.pageContent).toContain( + issue.fields.timeestimate?.toString() || "" + ); + expect(document.pageContent).toContain( + issue.fields.timespent?.toString() || "" + ); + expect(document.pageContent).toContain(issue.fields.resolutiondate || ""); + expect(document.pageContent).toContain( + issue.fields.progress.percent?.toString() || "" + ); + }); +}); + +export function someJiraIssueType( + overrides: Partial = {} +): JiraIssueType { + const baseIssueType: JiraIssueType = { + avatarId: faker.number.int({ min: 1, max: 100 }), + description: faker.lorem.sentence(), + entityId: faker.string.uuid(), + hierarchyLevel: faker.number.int({ min: 1, max: 5 }), + iconUrl: faker.image.url(), + id: faker.string.numeric(5), + name: faker.helpers.arrayElement(["Bug", "Task", "Story", "Epic"]), + self: faker.internet.url(), + subtask: false, + }; + + return { + ...baseIssueType, + ...overrides, + }; +} + +export function someJiraUser(overrides: Partial = {}): JiraUser { + const baseUser = { + accountId: faker.string.uuid(), + accountType: "atlassian", + active: true, + avatarUrls: { + "16x16": faker.image.avatar(), + "24x24": faker.image.avatar(), + "32x32": faker.image.avatar(), + "48x48": faker.image.avatar(), + }, + displayName: faker.person.fullName(), + emailAddress: faker.internet.email(), + self: faker.internet.url(), + timeZone: faker.location.timeZone(), + }; + + return { + ...baseUser, + ...overrides, + }; +} + +export function someJiraPriority( + overrides: Partial = {} +): JiraPriority { + const basePriority: JiraPriority = { + iconUrl: faker.image.url(), + id: faker.string.numeric(2), + name: faker.helpers.arrayElement([ + "Highest", + "High", + "Medium", + "Low", + "Lowest", + ]), + self: faker.internet.url(), + }; + + return { + ...basePriority, + ...overrides, + }; +} + +export function someJiraProgress( + overrides: Partial = {} +): JiraProgress { + const baseProgress: JiraProgress = { + progress: faker.number.int({ min: 0, max: 100 }), + total: 100, + percent: faker.number.int({ min: 0, max: 100 }), + }; + + return { + ...baseProgress, + ...overrides, + }; +} + +export function someJiraProject( + overrides: Partial = {} +): JiraProject { + const baseProject: JiraProject = { + avatarUrls: { + "16x16": faker.image.avatar(), + "24x24": faker.image.avatar(), + "32x32": faker.image.avatar(), + "48x48": faker.image.avatar(), + }, + id: faker.string.numeric(5), + key: faker.string.alpha(4).toUpperCase(), + name: faker.company.name(), + projectTypeKey: "software", + self: faker.internet.url(), + simplified: false, + }; + + return { + ...baseProject, + ...overrides, + }; +} + +export function someJiraStatusCategory( + overrides: Partial = {} +): JiraStatusCategory { + const baseStatusCategory: JiraStatusCategory = { + self: faker.internet.url(), + id: faker.number.int({ min: 1, max: 5 }), + key: faker.helpers.arrayElement(["new", "indeterminate", "done"]), + colorName: faker.helpers.arrayElement(["blue-gray", "yellow", "green"]), + name: faker.helpers.arrayElement(["To Do", "In Progress", "Done"]), + }; + + return { + ...baseStatusCategory, + ...overrides, + }; +} + +export function someJiraStatus( + overrides: Partial = {} +): JiraStatus { + const baseStatus: JiraStatus = { + self: faker.internet.url(), + description: faker.lorem.sentence(), + iconUrl: faker.image.url(), + name: faker.helpers.arrayElement([ + "To Do", + "In Progress", + "Done", + "Blocked", + ]), + id: faker.string.numeric(2), + statusCategory: someJiraStatusCategory(), + }; + + return { + ...baseStatus, + ...overrides, + }; +} + +export function someJiraIssue(overrides: Partial = {}): JiraIssue { + const issueKey = `${faker.string.alpha(4).toUpperCase()}-${faker.number.int({ + min: 1, + max: 9999, + })}`; + + const baseIssue: JiraIssue = { + expand: "renderedFields", + id: faker.string.numeric(5), + self: `https://${faker.internet.domainName()}/rest/api/2/issue/${issueKey}`, + key: issueKey, + fields: { + assignee: faker.datatype.boolean() ? someJiraUser() : undefined, + created: faker.date.past().toISOString(), + description: faker.lorem.paragraph(), + issuelinks: [], + issuetype: someJiraIssueType(), + labels: faker.datatype.boolean() + ? Array.from({ length: faker.number.int({ min: 1, max: 5 }) }, () => + faker.word.noun() + ) + : undefined, + priority: someJiraPriority(), + progress: someJiraProgress(), + project: someJiraProject(), + reporter: faker.datatype.boolean() ? someJiraUser() : undefined, + creator: someJiraUser(), + resolutiondate: faker.datatype.boolean() + ? faker.date.recent().toISOString() + : undefined, + status: someJiraStatus(), + subtasks: [], + summary: faker.lorem.sentence(), + timeestimate: faker.datatype.boolean() + ? faker.number.int({ min: 1, max: 100 }) * 3600 + : undefined, + timespent: faker.datatype.boolean() + ? faker.number.int({ min: 1, max: 100 }) * 3600 + : undefined, + updated: faker.date.recent().toISOString(), + duedate: faker.datatype.boolean() + ? faker.date.future().toISOString() + : undefined, + }, + }; + console.log(baseIssue.fields.duedate); + + return { + ...baseIssue, + ...overrides, + }; +} diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts new file mode 100644 index 000000000000..59e0879d2ab9 --- /dev/null +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -0,0 +1,441 @@ +import { Document } from "@langchain/core/documents"; +import { BaseDocumentLoader } from "@langchain/core/document_loaders/base"; + +export type JiraStatusCategory = { + self: string; + id: number; + key: string; + colorName: string; + name: string; +}; + +export type JiraStatus = { + self: string; + description: string; + iconUrl: string; + name: string; + id: string; + statusCategory: JiraStatusCategory; +}; + +export type JiraUser = { + accountId: string; + accountType: string; + active: boolean; + avatarUrls: { + "16x16": string; + "24x24": string; + "32x32": string; + "48x48": string; + }; + displayName: string; + emailAddress: string; + self: string; + timeZone: string; +}; + +export type JiraIssueType = { + avatarId: number; + description: string; + entityId: string; + hierarchyLevel: number; + iconUrl: string; + id: string; + name: string; + self: string; + subtask: boolean; +}; + +export type JiraPriority = { + iconUrl: string; + id: string; + name: string; + self: string; +}; + +export type JiraProgress = { + progress: number; + total: number; + percent?: number; +}; + +export type JiraProject = { + avatarUrls: { + "16x16": string; + "24x24": string; + "32x32": string; + "48x48": string; + }; + id: string; + key: string; + name: string; + projectTypeKey: string; + self: string; + simplified: boolean; +}; + +export type JiraSubTask = { + id: string; + key: string; + self: string; + fields: { + issuetype: JiraIssueType; + priority: JiraPriority; + status: JiraStatus; + summary: string; + }; +}; + +export type JiraIssueLinkType = { + id: string; + name: string; + inward: string; + outward: string; + self: string; +}; + +export type JiraBriefIssue = { + id: string; + key: string; + self: string; + fields: { + summary: string; + status: JiraStatus; + priority: JiraPriority; + issuetype: JiraIssueType; + }; +}; + +export type JiraIssueLink = { + id: string; + self: string; + type: JiraIssueLinkType; + inwardIssue?: JiraBriefIssue; + outwardIssue?: JiraBriefIssue; +}; + +export type JiraIssue = { + expand: string; + id: string; + self: string; + key: string; + fields: { + assignee?: JiraUser; + created: string; + description: string; + issuelinks: JiraIssueLink[]; + issuetype: JiraIssueType; + labels?: string[]; + priority: JiraPriority; + progress: JiraProgress; + project: JiraProject; + reporter?: JiraUser; + creator: JiraUser; + resolutiondate?: string; + status: JiraStatus; + subtasks: JiraSubTask[]; + summary: string; + timeestimate?: number; + timespent?: number; + updated: string; + duedate?: string; + parent?: JiraBriefIssue; + }; +}; + +export type JiraAPIResponse = { + expand: string; + startAt: number; + maxResults: number; + total: number; + issues: JiraIssue[]; +}; + +/** + * Interface representing the parameters for configuring the + * JiraDocumentConverter. + */ +export interface JiraDocumentConverterParams { + host: string; + projectKey: string; +} + +/** + * Class responsible for converting Jira issues to Document objects + */ +export class JiraDocumentConverter { + public readonly host: string; + + public readonly projectKey: string; + + constructor({ host, projectKey }: JiraDocumentConverterParams) { + this.host = host; + this.projectKey = projectKey; + } + + public convertToDocuments(issues: JiraIssue[]): Document[] { + return issues.map((issue) => this.documentFromIssue(issue)); + } + + private documentFromIssue(issue: JiraIssue): Document { + return new Document({ + pageContent: this.formatIssueInfo({ + issue, + host: this.host, + }), + metadata: { + id: issue.id, + host: this.host, + projectKey: this.projectKey, + }, + }); + } + + private formatIssueInfo({ + issue, + host, + }: { + issue: JiraIssue; + host: string; + }): string { + let text = `Issue: ${this.formatMainIssueInfoText({ issue, host })}\n`; + text += `Project: ${issue.fields.project.name} (${issue.fields.project.key}, ID ${issue.fields.project.id})\n`; + text += `Status: ${issue.fields.status.name}\n`; + text += `Priority: ${issue.fields.priority.name}\n`; + text += `Type: ${issue.fields.issuetype.name}\n`; + text += `Creator: ${issue.fields.creator.displayName}\n`; + + if (issue.fields.labels && issue.fields.labels.length > 0) { + text += `Labels: ${issue.fields.labels.join(", ")}\n`; + } + + text += `Created: ${issue.fields.created}\n`; + text += `Updated: ${issue.fields.updated}\n`; + + if (issue.fields.reporter) { + text += `Reporter: ${issue.fields.reporter.displayName}\n`; + } + + text += `Assignee: ${issue.fields.assignee?.displayName ?? "Unassigned"}\n`; + + if (issue.fields.duedate) { + text += `Due Date: ${issue.fields.duedate}\n`; + } + + if (issue.fields.timeestimate) { + text += `Time Estimate: ${issue.fields.timeestimate}\n`; + } + + if (issue.fields.timespent) { + text += `Time Spent: ${issue.fields.timespent}\n`; + } + + if (issue.fields.resolutiondate) { + text += `Resolution Date: ${issue.fields.resolutiondate}\n`; + } + + if (issue.fields.description) { + text += `Description: ${issue.fields.description}\n`; + } + + if (issue.fields.progress.percent) { + text += `Progress: ${issue.fields.progress.percent}%\n`; + } + + if (issue.fields.parent) { + text += `Parent Issue: ${this.formatMainIssueInfoText({ + issue: issue.fields.parent, + host, + })}\n`; + } + + if (issue.fields.subtasks.length > 0) { + text += `Subtasks:\n`; + issue.fields.subtasks.forEach((subtask) => { + text += ` - ${this.formatMainIssueInfoText({ + issue: subtask, + host, + })}\n`; + }); + } + + if (issue.fields.issuelinks.length > 0) { + text += `Issue Links:\n`; + issue.fields.issuelinks.forEach((link) => { + text += ` - ${link.type.name}\n`; + if (link.inwardIssue) { + text += ` - ${this.formatMainIssueInfoText({ + issue: link.inwardIssue, + host, + })}\n`; + } + if (link.outwardIssue) { + text += ` - ${this.formatMainIssueInfoText({ + issue: link.outwardIssue, + host, + })}\n`; + } + }); + } + + return text; + } + + private getLinkToIssue({ + issueKey, + host, + }: { + issueKey: string; + host: string; + }): string { + return `${host}/browse/${issueKey}`; + } + + private formatMainIssueInfoText({ + issue, + host, + }: { + issue: JiraIssue | JiraBriefIssue; + host: string; + }): string { + const link = this.getLinkToIssue({ + issueKey: issue.key, + host, + }); + + const text = `${issue.key} (ID ${issue.id}) - ${issue.fields.summary} (${link})`; + + return text; + } +} + +/** + * Interface representing the parameters for configuring the + * JiraProjectLoader. + */ +export interface JiraProjectLoaderParams { + host: string; + projectKey: string; + username: string; + accessToken: string; + limitPerRequest?: number; + createdAfter?: Date; +} + +const API_ENDPOINTS = { + SEARCH: "/rest/api/2/search", +}; + +/** + * Class representing a document loader for loading pages from Confluence. + */ +export class JiraProjectLoader extends BaseDocumentLoader { + private readonly accessToken: string; + + public readonly host: string; + + public readonly projectKey: string; + + public readonly username: string; + + public readonly limitPerRequest: number; + + private readonly createdAfter?: Date; + + private readonly documentConverter: JiraDocumentConverter; + + constructor({ + host, + projectKey, + username, + accessToken, + limitPerRequest = 100, + createdAfter, + }: JiraProjectLoaderParams) { + super(); + this.host = host; + this.projectKey = projectKey; + this.username = username; + this.accessToken = accessToken; + this.limitPerRequest = limitPerRequest; + this.createdAfter = createdAfter; + this.documentConverter = new JiraDocumentConverter({ host, projectKey }); + } + + private buildAuthorizationHeader(): string { + return `Basic ${Buffer.from( + `${this.username}:${this.accessToken}` + ).toString("base64")}`; + } + + public async load(): Promise { + try { + const allJiraIssues = await this.loadAsIssues(); + return this.documentConverter.convertToDocuments(allJiraIssues); + } catch (error) { + console.error("Error:", error); + return []; + } + } + + public async loadAsIssues(): Promise { + const allIssues: JiraIssue[] = []; + + for await (const issues of this.fetchIssues()) { + allIssues.push(...issues); + } + + return allIssues; + } + + protected toJiraDateString(date: Date | undefined): string | undefined { + if (!date) { + return undefined; + } + const year = date.getFullYear(); + const month = String(date.getMonth() + 1).padStart(2, "0"); + const dayOfMonth = String(date.getDate()).padStart(2, "0"); + return `${year}-${month}-${dayOfMonth}`; + } + + protected async *fetchIssues(): AsyncIterable { + const authorizationHeader = this.buildAuthorizationHeader(); + const url = `${this.host}${API_ENDPOINTS.SEARCH}`; + const createdAfterAsString = this.toJiraDateString(this.createdAfter); + let startAt = 0; + + while (true) { + try { + const jqlProps = [ + `project=${this.projectKey}`, + ...(createdAfterAsString ? [`created>=${createdAfterAsString}`] : []), + ]; + const params = new URLSearchParams({ + jql: jqlProps.join(" AND "), + startAt: `${startAt}`, + maxResults: `${this.limitPerRequest}`, + }); + const pageUrl = `${url}?${params}`; + + const options = { + method: "GET", + headers: { + Authorization: authorizationHeader, + Accept: "application/json", + }, + }; + + const response = await fetch(pageUrl, options); + const data: JiraAPIResponse = await response.json(); + + if (!data.issues || data.issues.length === 0) break; + + yield data.issues; + startAt += this.limitPerRequest; + } catch (error) { + console.error(error); + yield []; + } + } + } +} diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts index 2ec7b20bc542..defd3600a68b 100644 --- a/libs/langchain-community/src/load/import_map.ts +++ b/libs/langchain-community/src/load/import_map.ts @@ -77,6 +77,7 @@ export * as indexes__base from "../indexes/base.js"; export * as indexes__memory from "../indexes/memory.js"; export * as document_loaders__web__airtable from "../document_loaders/web/airtable.js"; export * as document_loaders__web__html from "../document_loaders/web/html.js"; +export * as document_loaders__web__jira from "../document_loaders/web/jira.js"; export * as document_loaders__web__searchapi from "../document_loaders/web/searchapi.js"; export * as document_loaders__web__serpapi from "../document_loaders/web/serpapi.js"; export * as document_loaders__web__sort_xyz_blockchain from "../document_loaders/web/sort_xyz_blockchain.js"; From acb81e99aea0235e099e3dc5d51e36535aee21a7 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Tue, 31 Dec 2024 14:49:47 -0800 Subject: [PATCH 07/10] release(community): 0.3.21 (#7453) --- libs/langchain-community/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 141c6f38eef2..71382e6f51a8 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/community", - "version": "0.3.20", + "version": "0.3.21", "description": "Third-party integrations for LangChain.js", "type": "module", "engines": { From 9cb74a121440edd69ccc4cc5adae6608edf76773 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Tue, 31 Dec 2024 14:51:15 -0800 Subject: [PATCH 08/10] release(aws): 0.1.3 (#7454) --- libs/langchain-aws/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/langchain-aws/package.json b/libs/langchain-aws/package.json index dfa7745f56bc..50a419fb57e2 100644 --- a/libs/langchain-aws/package.json +++ b/libs/langchain-aws/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/aws", - "version": "0.1.2", + "version": "0.1.3", "description": "LangChain AWS integration", "type": "module", "engines": { From 2577ec0372eb117162b1b46a2c9f49c845bac5bb Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Tue, 31 Dec 2024 14:53:08 -0800 Subject: [PATCH 09/10] release(core): 0.3.27 (#7455) --- langchain-core/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain-core/package.json b/langchain-core/package.json index 32510ecc8c72..b938bb050122 100644 --- a/langchain-core/package.json +++ b/langchain-core/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/core", - "version": "0.3.26", + "version": "0.3.27", "description": "Core LangChain.js abstractions and schemas", "type": "module", "engines": { From 7f729c9d3b0232ed4e92c3963f514458461d85e5 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Tue, 31 Dec 2024 14:59:36 -0800 Subject: [PATCH 10/10] release(google): 0.1.6 (#7456) --- libs/langchain-google-common/package.json | 2 +- libs/langchain-google-gauth/package.json | 4 ++-- libs/langchain-google-vertexai-web/package.json | 4 ++-- libs/langchain-google-vertexai/package.json | 4 ++-- libs/langchain-google-webauth/package.json | 4 ++-- yarn.lock | 14 +++++++------- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/libs/langchain-google-common/package.json b/libs/langchain-google-common/package.json index f5d32045f91c..5f32db49fc6a 100644 --- a/libs/langchain-google-common/package.json +++ b/libs/langchain-google-common/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/google-common", - "version": "0.1.5", + "version": "0.1.6", "description": "Core types and classes for Google services.", "type": "module", "engines": { diff --git a/libs/langchain-google-gauth/package.json b/libs/langchain-google-gauth/package.json index c8851626c9f8..405415bddab3 100644 --- a/libs/langchain-google-gauth/package.json +++ b/libs/langchain-google-gauth/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/google-gauth", - "version": "0.1.5", + "version": "0.1.6", "description": "Google auth based authentication support for Google services", "type": "module", "engines": { @@ -35,7 +35,7 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@langchain/google-common": "~0.1.5", + "@langchain/google-common": "~0.1.6", "google-auth-library": "^8.9.0" }, "peerDependencies": { diff --git a/libs/langchain-google-vertexai-web/package.json b/libs/langchain-google-vertexai-web/package.json index 5d52d4680fce..80cbde8a1d85 100644 --- a/libs/langchain-google-vertexai-web/package.json +++ b/libs/langchain-google-vertexai-web/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/google-vertexai-web", - "version": "0.1.5", + "version": "0.1.6", "description": "LangChain.js support for Google Vertex AI Web", "type": "module", "engines": { @@ -32,7 +32,7 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@langchain/google-webauth": "~0.1.5" + "@langchain/google-webauth": "~0.1.6" }, "peerDependencies": { "@langchain/core": ">=0.2.21 <0.4.0" diff --git a/libs/langchain-google-vertexai/package.json b/libs/langchain-google-vertexai/package.json index aa7f52592fec..ce1819caefe8 100644 --- a/libs/langchain-google-vertexai/package.json +++ b/libs/langchain-google-vertexai/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/google-vertexai", - "version": "0.1.5", + "version": "0.1.6", "description": "LangChain.js support for Google Vertex AI", "type": "module", "engines": { @@ -32,7 +32,7 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@langchain/google-gauth": "~0.1.5" + "@langchain/google-gauth": "~0.1.6" }, "peerDependencies": { "@langchain/core": ">=0.2.21 <0.4.0" diff --git a/libs/langchain-google-webauth/package.json b/libs/langchain-google-webauth/package.json index 0a46b9221e31..c200748b3e81 100644 --- a/libs/langchain-google-webauth/package.json +++ b/libs/langchain-google-webauth/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/google-webauth", - "version": "0.1.5", + "version": "0.1.6", "description": "Web-based authentication support for Google services", "type": "module", "engines": { @@ -32,7 +32,7 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@langchain/google-common": "~0.1.5", + "@langchain/google-common": "~0.1.6", "web-auth-library": "^1.0.3" }, "peerDependencies": { diff --git a/yarn.lock b/yarn.lock index 3d1769869beb..fa91e3d3f1ea 100644 --- a/yarn.lock +++ b/yarn.lock @@ -12443,7 +12443,7 @@ __metadata: languageName: unknown linkType: soft -"@langchain/google-common@^0.1.0, @langchain/google-common@workspace:*, @langchain/google-common@workspace:libs/langchain-google-common, @langchain/google-common@~0.1.5": +"@langchain/google-common@^0.1.0, @langchain/google-common@workspace:*, @langchain/google-common@workspace:libs/langchain-google-common, @langchain/google-common@~0.1.6": version: 0.0.0-use.local resolution: "@langchain/google-common@workspace:libs/langchain-google-common" dependencies: @@ -12478,13 +12478,13 @@ __metadata: languageName: unknown linkType: soft -"@langchain/google-gauth@workspace:libs/langchain-google-gauth, @langchain/google-gauth@~0.1.5": +"@langchain/google-gauth@workspace:libs/langchain-google-gauth, @langchain/google-gauth@~0.1.6": version: 0.0.0-use.local resolution: "@langchain/google-gauth@workspace:libs/langchain-google-gauth" dependencies: "@jest/globals": ^29.5.0 "@langchain/core": "workspace:*" - "@langchain/google-common": ~0.1.5 + "@langchain/google-common": ~0.1.6 "@langchain/scripts": ">=0.1.0 <0.2.0" "@swc/core": ^1.3.90 "@swc/jest": ^0.2.29 @@ -12557,7 +12557,7 @@ __metadata: "@jest/globals": ^29.5.0 "@langchain/core": "workspace:*" "@langchain/google-common": ^0.1.0 - "@langchain/google-webauth": ~0.1.5 + "@langchain/google-webauth": ~0.1.6 "@langchain/scripts": ">=0.1.0 <0.2.0" "@langchain/standard-tests": 0.0.0 "@swc/core": ^1.3.90 @@ -12593,7 +12593,7 @@ __metadata: "@jest/globals": ^29.5.0 "@langchain/core": "workspace:*" "@langchain/google-common": ^0.1.0 - "@langchain/google-gauth": ~0.1.5 + "@langchain/google-gauth": ~0.1.6 "@langchain/scripts": ">=0.1.0 <0.2.0" "@langchain/standard-tests": 0.0.0 "@swc/core": ^1.3.90 @@ -12622,13 +12622,13 @@ __metadata: languageName: unknown linkType: soft -"@langchain/google-webauth@workspace:libs/langchain-google-webauth, @langchain/google-webauth@~0.1.5": +"@langchain/google-webauth@workspace:libs/langchain-google-webauth, @langchain/google-webauth@~0.1.6": version: 0.0.0-use.local resolution: "@langchain/google-webauth@workspace:libs/langchain-google-webauth" dependencies: "@jest/globals": ^29.5.0 "@langchain/core": "workspace:*" - "@langchain/google-common": ~0.1.5 + "@langchain/google-common": ~0.1.6 "@langchain/scripts": ">=0.1.0 <0.2.0" "@swc/core": ^1.3.90 "@swc/jest": ^0.2.29