diff --git a/docs/api_refs/blacklisted-entrypoints.json b/docs/api_refs/blacklisted-entrypoints.json index 419d8800827d..e1b4fa28e3a6 100644 --- a/docs/api_refs/blacklisted-entrypoints.json +++ b/docs/api_refs/blacklisted-entrypoints.json @@ -15,6 +15,7 @@ "../../langchain/src/embeddings/tensorflow.ts", "../../langchain/src/embeddings/hf.ts", "../../langchain/src/embeddings/hf_transformers.ts", + "../../langchain/src/embeddings/huggingface_transformers.ts", "../../langchain/src/embeddings/googlevertexai.ts", "../../langchain/src/embeddings/googlepalm.ts", "../../langchain/src/embeddings/minimax.ts", diff --git a/docs/core_docs/docs/integrations/document_loaders/file_loaders/docx.mdx b/docs/core_docs/docs/integrations/document_loaders/file_loaders/docx.mdx index baaf464a5e5b..8e46cde7a1b8 100644 --- a/docs/core_docs/docs/integrations/document_loaders/file_loaders/docx.mdx +++ b/docs/core_docs/docs/integrations/document_loaders/file_loaders/docx.mdx @@ -4,17 +4,38 @@ hide_table_of_contents: true # Docx files -This example goes over how to load data from docx files. +The `DocxLoader` allows you to extract text data from Microsoft Word documents. It supports both the modern `.docx` format and the legacy `.doc` format. Depending on the file type, additional dependencies are required. -# Setup +--- + +## Setup + +To use `DocxLoader`, you'll need the `@langchain/community` integration along with either `mammoth` or `word-extractor` package: + +- **`mammoth`**: For processing `.docx` files. +- **`word-extractor`**: For handling `.doc` files. + +### Installation + +#### For `.docx` Files ```bash npm2yarn npm install @langchain/community @langchain/core mammoth ``` -# Usage +#### For `.doc` Files + +```bash npm2yarn +npm install @langchain/community @langchain/core word-extractor +``` + +## Usage + +### Loading `.docx` Files -```typescript +For `.docx` files, there is no need to explicitly specify any parameters when initializing the loader: + +```javascript import { DocxLoader } from "@langchain/community/document_loaders/fs/docx"; const loader = new DocxLoader( @@ -23,3 +44,20 @@ const loader = new DocxLoader( const docs = await loader.load(); ``` + +### Loading `.doc` Files + +For `.doc` files, you must explicitly specify the `type` as `doc` when initializing the loader: + +```javascript +import { DocxLoader } from "@langchain/community/document_loaders/fs/docx"; + +const loader = new DocxLoader( + "src/document_loaders/tests/example_data/attention.doc", + { + type: "doc", + } +); + +const docs = await loader.load(); +``` diff --git a/docs/core_docs/docs/integrations/document_loaders/web_loaders/jira.mdx b/docs/core_docs/docs/integrations/document_loaders/web_loaders/jira.mdx new file mode 100644 index 000000000000..fc77f4a14ab1 --- /dev/null +++ b/docs/core_docs/docs/integrations/document_loaders/web_loaders/jira.mdx @@ -0,0 +1,23 @@ +--- +sidebar_class_name: node-only +--- + +# Jira + +:::tip Compatibility +Only available on Node.js. +::: + +This covers how to load document objects from issues in a Jira projects. + +## Credentials + +- You'll need to set up an access token and provide it along with your Jira username in order to authenticate the request +- You'll also need the project key and host URL for the project containing the issues to load as documents. + +## Usage + +import CodeBlock from "@theme/CodeBlock"; +import Example from "@examples/document_loaders/jira.ts"; + +{Example} diff --git a/docs/core_docs/docs/integrations/text_embedding/transformers.mdx b/docs/core_docs/docs/integrations/text_embedding/transformers.mdx index dc75291a39de..97f08776ab6a 100644 --- a/docs/core_docs/docs/integrations/text_embedding/transformers.mdx +++ b/docs/core_docs/docs/integrations/text_embedding/transformers.mdx @@ -8,10 +8,15 @@ It runs locally and even works directly in the browser, allowing you to create w ## Setup -You'll need to install the [@xenova/transformers](https://www.npmjs.com/package/@xenova/transformers) package as a peer dependency: +You'll need to install the [@huggingface/transformers](https://www.npmjs.com/package/@huggingface/transformers) package as a peer dependency: + +:::tip Compatibility +If you are using a version of community older than 0.3.21, install the older `@xenova/transformers` package and +import the embeddings from `"@langchain/community/embeddings/hf_transformers"` below. +::: ```bash npm2yarn -npm install @xenova/transformers +npm install @huggingface/transformers ``` import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx"; diff --git a/environment_tests/test-exports-cjs/src/import.js b/environment_tests/test-exports-cjs/src/import.js index 752cfdea37ba..6723331f7dc9 100644 --- a/environment_tests/test-exports-cjs/src/import.js +++ b/environment_tests/test-exports-cjs/src/import.js @@ -3,7 +3,7 @@ async function test() { const { OpenAI } = await import("@langchain/openai"); const { LLMChain } = await import("langchain/chains"); const { ChatPromptTemplate } = await import("@langchain/core/prompts"); - const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/hf_transformers"); + const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/huggingface_transformers"); const { Document } = await import("@langchain/core/documents"); const { MemoryVectorStore } = await import("langchain/vectorstores/memory"); diff --git a/environment_tests/test-exports-cjs/src/index.mjs b/environment_tests/test-exports-cjs/src/index.mjs index 632b8081fbaa..7f30afdc81d6 100644 --- a/environment_tests/test-exports-cjs/src/index.mjs +++ b/environment_tests/test-exports-cjs/src/index.mjs @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai"; import { LLMChain } from "langchain/chains"; import { MemoryVectorStore } from "langchain/vectorstores/memory"; import { ChatPromptTemplate } from "@langchain/core/prompts"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; import { Document } from "@langchain/core/documents"; // Test exports diff --git a/environment_tests/test-exports-cjs/src/index.ts b/environment_tests/test-exports-cjs/src/index.ts index d2dcb9ebab0d..c4077382afd4 100644 --- a/environment_tests/test-exports-cjs/src/index.ts +++ b/environment_tests/test-exports-cjs/src/index.ts @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai"; import { LLMChain } from "langchain/chains"; import { ChatPromptTemplate } from "@langchain/core/prompts"; import { MemoryVectorStore } from "langchain/vectorstores/memory"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; import { Document } from "@langchain/core/documents"; async function test(useAzure: boolean = false) { @@ -25,7 +25,9 @@ async function test(useAzure: boolean = false) { openAIApiKey: "sk-XXXX", }; - const vs = new MemoryVectorStore(new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2" })); + const vs = new MemoryVectorStore( + new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2" }) + ); await vs.addVectors( [ diff --git a/environment_tests/test-exports-cjs/src/require.js b/environment_tests/test-exports-cjs/src/require.js index 1343f8587f35..f9110d7cd71f 100644 --- a/environment_tests/test-exports-cjs/src/require.js +++ b/environment_tests/test-exports-cjs/src/require.js @@ -3,7 +3,7 @@ const { OpenAI } = require("@langchain/openai"); const { LLMChain } = require("langchain/chains"); const { ChatPromptTemplate } = require("@langchain/core/prompts"); const { MemoryVectorStore } = require("langchain/vectorstores/memory"); -const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/hf_transformers"); +const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/huggingface_transformers"); const { Document } = require("@langchain/core/documents"); async function test() { diff --git a/environment_tests/test-exports-esm/src/import.cjs b/environment_tests/test-exports-esm/src/import.cjs index 6837754c442c..6ab7bc73355e 100644 --- a/environment_tests/test-exports-esm/src/import.cjs +++ b/environment_tests/test-exports-esm/src/import.cjs @@ -4,7 +4,7 @@ async function test() { const { LLMChain } = await import("langchain/chains"); const { ChatPromptTemplate } = await import("@langchain/core/prompts"); const { MemoryVectorStore } = await import("langchain/vectorstores/memory"); - const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/hf_transformers"); + const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/huggingface_transformers"); const { Document } = await import("@langchain/core/documents"); // Test exports diff --git a/environment_tests/test-exports-esm/src/index.js b/environment_tests/test-exports-esm/src/index.js index 2347699ee1dc..0046911c1adb 100644 --- a/environment_tests/test-exports-esm/src/index.js +++ b/environment_tests/test-exports-esm/src/index.js @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai"; import { LLMChain } from "langchain/chains"; import { ChatPromptTemplate } from "@langchain/core/prompts"; import { MemoryVectorStore } from "langchain/vectorstores/memory"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; import { Document } from "@langchain/core/documents"; import { CallbackManager } from "@langchain/core/callbacks/manager"; diff --git a/environment_tests/test-exports-esm/src/index.ts b/environment_tests/test-exports-esm/src/index.ts index c29f419c07a8..2d55732c8758 100644 --- a/environment_tests/test-exports-esm/src/index.ts +++ b/environment_tests/test-exports-esm/src/index.ts @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai"; import { LLMChain } from "langchain/chains"; import { ChatPromptTemplate } from "@langchain/core/prompts"; import { MemoryVectorStore } from "langchain/vectorstores/memory"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; import { Document } from "@langchain/core/documents"; async function test(useAzure: boolean = false) { @@ -24,7 +24,9 @@ async function test(useAzure: boolean = false) { openAIApiKey: "sk-XXXX", }; - const vs = new MemoryVectorStore(new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2", })); + const vs = new MemoryVectorStore( + new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2" }) + ); await vs.addVectors( [ diff --git a/environment_tests/test-exports-esm/src/require.cjs b/environment_tests/test-exports-esm/src/require.cjs index 31461c4b6c23..ab0db459e6c1 100644 --- a/environment_tests/test-exports-esm/src/require.cjs +++ b/environment_tests/test-exports-esm/src/require.cjs @@ -3,7 +3,7 @@ const { OpenAI } = require("@langchain/openai"); const { LLMChain } = require("langchain/chains"); const { ChatPromptTemplate } = require("@langchain/core/prompts"); const { MemoryVectorStore } = require("langchain/vectorstores/memory"); -const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/hf_transformers"); +const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/huggingface_transformers"); const { Document } = require("@langchain/core/documents"); async function test() { diff --git a/examples/.env.example b/examples/.env.example index 25730a68857e..83a320555978 100644 --- a/examples/.env.example +++ b/examples/.env.example @@ -84,4 +84,8 @@ HANA_HOST=HANA_DB_ADDRESS HANA_PORT=HANA_DB_PORT HANA_UID=HANA_DB_USER HANA_PWD=HANA_DB_PASSWORD -ARK_API_KEY=ADD_YOURS_HERE # https://console.volcengine.com/ \ No newline at end of file +ARK_API_KEY=ADD_YOURS_HERE # https://console.volcengine.com/ +JIRA_HOST=ADD_YOURS_HERE +JIRA_USERNAME=ADD_YOURS_HERE +JIRA_ACCESS_TOKEN=ADD_YOURS_HERE +JIRA_PROJECT_KEY=ADD_YOURS_HERE diff --git a/examples/src/document_loaders/jira.ts b/examples/src/document_loaders/jira.ts new file mode 100644 index 000000000000..50b2a9511c45 --- /dev/null +++ b/examples/src/document_loaders/jira.ts @@ -0,0 +1,26 @@ +import { JiraProjectLoader } from "@langchain/community/document_loaders/web/jira"; + +const host = process.env.JIRA_HOST || "https://jira.example.com"; +const username = process.env.JIRA_USERNAME; +const accessToken = process.env.JIRA_ACCESS_TOKEN; +const projectKey = process.env.JIRA_PROJECT_KEY || "PROJ"; + +if (username && accessToken) { + // Created within last 30 days + const createdAfter = new Date(); + createdAfter.setDate(createdAfter.getDate() - 30); + const loader = new JiraProjectLoader({ + host, + projectKey, + username, + accessToken, + createdAfter, + }); + + const documents = await loader.load(); + console.log(`Loaded ${documents.length} Jira document(s)`); +} else { + console.log( + "You must provide a username and access token to run this example." + ); +} diff --git a/examples/src/models/embeddings/hf_transformers.ts b/examples/src/models/embeddings/hf_transformers.ts index 2643eabe7c38..160530d9a30f 100644 --- a/examples/src/models/embeddings/hf_transformers.ts +++ b/examples/src/models/embeddings/hf_transformers.ts @@ -1,4 +1,4 @@ -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; const model = new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2", diff --git a/examples/src/use_cases/local_retrieval_qa/chain.ts b/examples/src/use_cases/local_retrieval_qa/chain.ts index c5da25e3a6f9..38bbceb4199b 100644 --- a/examples/src/use_cases/local_retrieval_qa/chain.ts +++ b/examples/src/use_cases/local_retrieval_qa/chain.ts @@ -2,7 +2,7 @@ import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/ import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters"; import { HNSWLib } from "@langchain/community/vectorstores/hnswlib"; import { Ollama } from "@langchain/community/llms/ollama"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; import { formatDocumentsAsString } from "langchain/util/document"; import { PromptTemplate } from "@langchain/core/prompts"; import { diff --git a/examples/src/use_cases/local_retrieval_qa/load_documents.ts b/examples/src/use_cases/local_retrieval_qa/load_documents.ts index 9e449b2cc532..7efea9a161ea 100644 --- a/examples/src/use_cases/local_retrieval_qa/load_documents.ts +++ b/examples/src/use_cases/local_retrieval_qa/load_documents.ts @@ -1,7 +1,7 @@ import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/cheerio"; import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters"; import { HNSWLib } from "@langchain/community/vectorstores/hnswlib"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; const loader = new CheerioWebBaseLoader( "https://lilianweng.github.io/posts/2023-06-23-agent/" diff --git a/examples/src/use_cases/local_retrieval_qa/qa_chain.ts b/examples/src/use_cases/local_retrieval_qa/qa_chain.ts index 949918067369..6bb4711e6276 100644 --- a/examples/src/use_cases/local_retrieval_qa/qa_chain.ts +++ b/examples/src/use_cases/local_retrieval_qa/qa_chain.ts @@ -3,7 +3,7 @@ import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/ import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters"; import { HNSWLib } from "@langchain/community/vectorstores/hnswlib"; import { Ollama } from "@langchain/community/llms/ollama"; -import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers"; +import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers"; import { PromptTemplate } from "@langchain/core/prompts"; const loader = new CheerioWebBaseLoader( diff --git a/langchain-core/package.json b/langchain-core/package.json index 32510ecc8c72..b938bb050122 100644 --- a/langchain-core/package.json +++ b/langchain-core/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/core", - "version": "0.3.26", + "version": "0.3.27", "description": "Core LangChain.js abstractions and schemas", "type": "module", "engines": { diff --git a/libs/langchain-aws/package.json b/libs/langchain-aws/package.json index dfa7745f56bc..50a419fb57e2 100644 --- a/libs/langchain-aws/package.json +++ b/libs/langchain-aws/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/aws", - "version": "0.1.2", + "version": "0.1.3", "description": "LangChain AWS integration", "type": "module", "engines": { diff --git a/libs/langchain-community/.env.example b/libs/langchain-community/.env.example new file mode 100644 index 000000000000..2c36f95558b3 --- /dev/null +++ b/libs/langchain-community/.env.example @@ -0,0 +1,4 @@ +JIRA_HOST=ADD_YOURS_HERE +JIRA_USERNAME=ADD_YOURS_HERE +JIRA_ACCESS_TOKEN=ADD_YOURS_HERE +JIRA_PROJECT_KEY=ADD_YOURS_HERE diff --git a/libs/langchain-community/.gitignore b/libs/langchain-community/.gitignore index 2f2abe352f2c..6eb6e9c7d9d3 100644 --- a/libs/langchain-community/.gitignore +++ b/libs/langchain-community/.gitignore @@ -178,6 +178,10 @@ embeddings/hf_transformers.cjs embeddings/hf_transformers.js embeddings/hf_transformers.d.ts embeddings/hf_transformers.d.cts +embeddings/huggingface_transformers.cjs +embeddings/huggingface_transformers.js +embeddings/huggingface_transformers.d.ts +embeddings/huggingface_transformers.d.cts embeddings/ibm.cjs embeddings/ibm.js embeddings/ibm.d.ts @@ -930,6 +934,10 @@ document_loaders/web/imsdb.cjs document_loaders/web/imsdb.js document_loaders/web/imsdb.d.ts document_loaders/web/imsdb.d.cts +document_loaders/web/jira.cjs +document_loaders/web/jira.js +document_loaders/web/jira.d.ts +document_loaders/web/jira.d.cts document_loaders/web/figma.cjs document_loaders/web/figma.js document_loaders/web/figma.d.ts diff --git a/libs/langchain-community/langchain.config.js b/libs/langchain-community/langchain.config.js index 496e015280e7..3883e56873f8 100644 --- a/libs/langchain-community/langchain.config.js +++ b/libs/langchain-community/langchain.config.js @@ -80,6 +80,7 @@ export const config = { "embeddings/gradient_ai": "embeddings/gradient_ai", "embeddings/hf": "embeddings/hf", "embeddings/hf_transformers": "embeddings/hf_transformers", + "embeddings/huggingface_transformers": "embeddings/huggingface_transformers", "embeddings/ibm": "embeddings/ibm", "embeddings/jina": "embeddings/jina", "embeddings/llama_cpp": "embeddings/llama_cpp", @@ -288,6 +289,7 @@ export const config = { "document_loaders/web/gitbook": "document_loaders/web/gitbook", "document_loaders/web/hn": "document_loaders/web/hn", "document_loaders/web/imsdb": "document_loaders/web/imsdb", + "document_loaders/web/jira": "document_loaders/web/jira", "document_loaders/web/figma": "document_loaders/web/figma", "document_loaders/web/firecrawl": "document_loaders/web/firecrawl", "document_loaders/web/github": "document_loaders/web/github", @@ -356,6 +358,7 @@ export const config = { "embeddings/tensorflow", "embeddings/hf", "embeddings/hf_transformers", + "embeddings/huggingface_transformers", "embeddings/ibm", "embeddings/jina", "embeddings/llama_cpp", diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json index 50795f2c3cef..a286ff97ff01 100644 --- a/libs/langchain-community/package.json +++ b/libs/langchain-community/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/community", - "version": "0.3.20", + "version": "0.3.21", "description": "Third-party integrations for LangChain.js", "type": "module", "engines": { @@ -78,6 +78,7 @@ "@google-cloud/storage": "^7.7.0", "@gradientai/nodejs-sdk": "^1.2.0", "@huggingface/inference": "^2.6.4", + "@huggingface/transformers": "^3.2.3", "@ibm-cloud/watsonx-ai": "^1.3.0", "@jest/globals": "^29.5.0", "@lancedb/lancedb": "^0.13.0", @@ -124,6 +125,7 @@ "@types/pg": "^8.11.0", "@types/pg-copy-streams": "^1.2.2", "@types/uuid": "^9", + "@types/word-extractor": "^1", "@types/ws": "^8", "@typescript-eslint/eslint-plugin": "^5.58.0", "@typescript-eslint/parser": "^5.58.0", @@ -134,7 +136,6 @@ "@vercel/postgres": "^0.5.0", "@writerai/writer-sdk": "^0.40.2", "@xata.io/client": "^0.28.0", - "@xenova/transformers": "^2.17.2", "@zilliz/milvus2-sdk-node": ">=2.3.5", "apify-client": "^2.7.1", "assemblyai": "^4.6.0", @@ -217,6 +218,7 @@ "voy-search": "0.6.2", "weaviate-ts-client": "^1.4.0", "web-auth-library": "^1.0.3", + "word-extractor": "^1.0.4", "youtube-transcript": "^1.0.6", "youtubei.js": "^9.1.0" }, @@ -249,6 +251,7 @@ "@google-cloud/storage": "^6.10.1 || ^7.7.0", "@gradientai/nodejs-sdk": "^1.2.0", "@huggingface/inference": "^2.6.4", + "@huggingface/transformers": "^3.2.3", "@ibm-cloud/watsonx-ai": "*", "@lancedb/lancedb": "^0.12.0", "@langchain/core": ">=0.2.21 <0.4.0", @@ -282,7 +285,6 @@ "@vercel/postgres": "^0.5.0", "@writerai/writer-sdk": "^0.40.2", "@xata.io/client": "^0.28.0", - "@xenova/transformers": "^2.17.2", "@zilliz/milvus2-sdk-node": ">=2.3.5", "apify-client": "^2.7.1", "assemblyai": "^4.6.0", @@ -344,6 +346,7 @@ "voy-search": "0.6.2", "weaviate-ts-client": "*", "web-auth-library": "^1.0.3", + "word-extractor": "*", "ws": "^8.14.2", "youtube-transcript": "^1.0.6", "youtubei.js": "^9.1.0" @@ -430,6 +433,9 @@ "@huggingface/inference": { "optional": true }, + "@huggingface/transformers": { + "optional": true + }, "@lancedb/lancedb": { "optional": true }, @@ -523,9 +529,6 @@ "@xata.io/client": { "optional": true }, - "@xenova/transformers": { - "optional": true - }, "@zilliz/milvus2-sdk-node": { "optional": true }, @@ -703,6 +706,9 @@ "web-auth-library": { "optional": true }, + "word-extractor": { + "optional": true + }, "ws": { "optional": true }, @@ -1122,6 +1128,15 @@ "import": "./embeddings/hf_transformers.js", "require": "./embeddings/hf_transformers.cjs" }, + "./embeddings/huggingface_transformers": { + "types": { + "import": "./embeddings/huggingface_transformers.d.ts", + "require": "./embeddings/huggingface_transformers.d.cts", + "default": "./embeddings/huggingface_transformers.d.ts" + }, + "import": "./embeddings/huggingface_transformers.js", + "require": "./embeddings/huggingface_transformers.cjs" + }, "./embeddings/ibm": { "types": { "import": "./embeddings/ibm.d.ts", @@ -2814,6 +2829,15 @@ "import": "./document_loaders/web/imsdb.js", "require": "./document_loaders/web/imsdb.cjs" }, + "./document_loaders/web/jira": { + "types": { + "import": "./document_loaders/web/jira.d.ts", + "require": "./document_loaders/web/jira.d.cts", + "default": "./document_loaders/web/jira.d.ts" + }, + "import": "./document_loaders/web/jira.js", + "require": "./document_loaders/web/jira.cjs" + }, "./document_loaders/web/figma": { "types": { "import": "./document_loaders/web/figma.d.ts", @@ -3349,6 +3373,10 @@ "embeddings/hf_transformers.js", "embeddings/hf_transformers.d.ts", "embeddings/hf_transformers.d.cts", + "embeddings/huggingface_transformers.cjs", + "embeddings/huggingface_transformers.js", + "embeddings/huggingface_transformers.d.ts", + "embeddings/huggingface_transformers.d.cts", "embeddings/ibm.cjs", "embeddings/ibm.js", "embeddings/ibm.d.ts", @@ -4101,6 +4129,10 @@ "document_loaders/web/imsdb.js", "document_loaders/web/imsdb.d.ts", "document_loaders/web/imsdb.d.cts", + "document_loaders/web/jira.cjs", + "document_loaders/web/jira.js", + "document_loaders/web/jira.d.ts", + "document_loaders/web/jira.d.cts", "document_loaders/web/figma.cjs", "document_loaders/web/figma.js", "document_loaders/web/figma.d.ts", diff --git a/libs/langchain-community/src/document_loaders/fs/docx.ts b/libs/langchain-community/src/document_loaders/fs/docx.ts index 72518aec3b2e..e1edef2fc8e7 100644 --- a/libs/langchain-community/src/document_loaders/fs/docx.ts +++ b/libs/langchain-community/src/document_loaders/fs/docx.ts @@ -1,19 +1,33 @@ import { Document } from "@langchain/core/documents"; import { BufferLoader } from "langchain/document_loaders/fs/buffer"; +type DocxLoaderOptions = { + type: "docx" | "doc"; +}; /** * A class that extends the `BufferLoader` class. It represents a document * loader that loads documents from DOCX files. + * It has a constructor that takes a `filePathOrBlob` parameter representing the path to the word + * file or a Blob object, and an optional `options` parameter of type + * `DocxLoaderOptions` */ export class DocxLoader extends BufferLoader { - constructor(filePathOrBlob: string | Blob) { + protected options: DocxLoaderOptions = { type: "docx" }; + + constructor(filePathOrBlob: string | Blob, options?: DocxLoaderOptions) { super(filePathOrBlob); + if (options) { + this.options = { + ...options, + }; + } } /** * A method that takes a `raw` buffer and `metadata` as parameters and * returns a promise that resolves to an array of `Document` instances. It - * uses the `extractRawText` function from the `mammoth` module to extract + * uses the `extractRawText` function from the `mammoth` module or + * `extract` method from the `word-extractor` module to extract * the raw text content from the buffer. If the extracted text content is * empty, it returns an empty array. Otherwise, it creates a new * `Document` instance with the extracted text content and the provided @@ -26,6 +40,31 @@ export class DocxLoader extends BufferLoader { raw: Buffer, metadata: Document["metadata"] ): Promise { + if (this.options.type === "doc") { + return this.parseDoc(raw, metadata); + } + return this.parseDocx(raw, metadata); + } + + /** + * A private method that takes a `raw` buffer and `metadata` as parameters and + * returns a promise that resolves to an array of `Document` instances. It + * uses the `extractRawText` function from the `mammoth` module to extract + * the raw text content from the buffer. If the extracted text content is + * empty, it returns an empty array. Otherwise, it creates a new + * `Document` instance with the extracted text content and the provided + * metadata, and returns it as an array. + * @param raw The raw buffer from which to extract text content. + * @param metadata The metadata to be associated with the created `Document` instance. + * @returns A promise that resolves to an array of `Document` instances. + */ + private async parseDocx( + raw: Buffer, + metadata: Document["metadata"] + ): Promise { + if (this.options.type === "doc") { + return this.parseDoc(raw, metadata); + } const { extractRawText } = await DocxLoaderImports(); const docx = await extractRawText({ buffer: raw, @@ -40,6 +79,33 @@ export class DocxLoader extends BufferLoader { }), ]; } + + /** + * A private method that takes a `raw` buffer and `metadata` as parameters and + * returns a promise that resolves to an array of `Document` instances. It + * uses the `extract` method from the `word-extractor` module to extract + * the raw text content from the buffer. If the extracted text content is + * empty, it returns an empty array. Otherwise, it creates a new + * `Document` instance with the extracted text content and the provided + * metadata, and returns it as an array. + * @param raw The raw buffer from which to extract text content. + * @param metadata The metadata to be associated with the created `Document` instance. + * @returns A promise that resolves to an array of `Document` instances. + */ + private async parseDoc( + raw: Buffer, + metadata: Document["metadata"] + ): Promise { + const WordExtractor = await DocLoaderImports(); + const extractor = new WordExtractor(); + const doc = await extractor.extract(raw); + return [ + new Document({ + pageContent: doc.getBody(), + metadata, + }), + ]; + } } async function DocxLoaderImports() { @@ -53,3 +119,15 @@ async function DocxLoaderImports() { ); } } + +async function DocLoaderImports() { + try { + const WordExtractor = await import("word-extractor"); + return WordExtractor.default; + } catch (e) { + console.error(e); + throw new Error( + "Failed to load word-extractor. Please install it with eg. `npm install word-extractor`." + ); + } +} diff --git a/libs/langchain-community/src/document_loaders/tests/docx.test.ts b/libs/langchain-community/src/document_loaders/tests/docx.test.ts index 63395bb51bc0..82e66aa91907 100644 --- a/libs/langchain-community/src/document_loaders/tests/docx.test.ts +++ b/libs/langchain-community/src/document_loaders/tests/docx.test.ts @@ -3,7 +3,7 @@ import * as url from "node:url"; import * as path from "node:path"; import { DocxLoader } from "../fs/docx.js"; -test("Test Word doc loader from file", async () => { +test("Test Word doc loader from .docx file", async () => { const filePath = path.resolve( path.dirname(url.fileURLToPath(import.meta.url)), "./example_data/attention.docx" @@ -14,3 +14,17 @@ test("Test Word doc loader from file", async () => { expect(docs.length).toBe(1); // not much text in the example expect(docs[0].pageContent).toContain("an interesting activity"); }); + +test("Test Word doc loader from .doc file", async () => { + const filePath = path.resolve( + path.dirname(url.fileURLToPath(import.meta.url)), + "./example_data/attention.doc" + ); + const loader = new DocxLoader(filePath, { + type: "doc", + }); + const docs = await loader.load(); + + expect(docs.length).toBe(1); // not much text in the example + expect(docs[0].pageContent).toContain("an interesting activity"); +}); diff --git a/libs/langchain-community/src/document_loaders/tests/example_data/attention.doc b/libs/langchain-community/src/document_loaders/tests/example_data/attention.doc new file mode 100644 index 000000000000..e68399c5a7c8 Binary files /dev/null and b/libs/langchain-community/src/document_loaders/tests/example_data/attention.doc differ diff --git a/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts b/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts new file mode 100644 index 000000000000..e01d1d65663b --- /dev/null +++ b/libs/langchain-community/src/document_loaders/tests/jira.int.test.ts @@ -0,0 +1,209 @@ +/** + * NOTE: Env var should be set, and configured project should exist + */ +import { Document } from "@langchain/core/documents"; +import { expect, test } from "@jest/globals"; +import { + JiraIssue, + JiraProjectLoader, + JiraProjectLoaderParams, +} from "../web/jira.js"; + +describe("JiraProjectLoader Integration Tests", () => { + const JIRA_HOST = requireEnvVar("JIRA_HOST"); + const JIRA_USERNAME = requireEnvVar("JIRA_USERNAME"); + const JIRA_ACCESS_TOKEN = requireEnvVar("JIRA_ACCESS_TOKEN"); + const JIRA_PROJECT_KEY = requireEnvVar("JIRA_PROJECT_KEY"); + const jiraConf: JiraProjectLoaderParams = { + host: JIRA_HOST, + projectKey: JIRA_PROJECT_KEY, + username: JIRA_USERNAME, + accessToken: JIRA_ACCESS_TOKEN, + limitPerRequest: 20, + }; + + test("should load Jira project issues as documents successfully", async () => { + const docs = await loadJiraDocsUntil((docs) => docs.length > 0); + + expect(docs).toBeDefined(); + expect(Array.isArray(docs)).toBe(true); + + if (docs.length < 1) { + // Skip test if not enough issues available + return; + } + const firstDoc = docs[0]; + + // Check document structure + expect(firstDoc).toHaveProperty("pageContent"); + expect(firstDoc).toHaveProperty("metadata"); + + // Check metadata + expect(firstDoc.metadata).toHaveProperty("id"); + expect(firstDoc.metadata).toHaveProperty("host", JIRA_HOST); + expect(firstDoc.metadata).toHaveProperty("projectKey", JIRA_PROJECT_KEY); + + // Check pageContent contains essential Jira issue information + const content = firstDoc.pageContent; + expect(content).toContain("Issue:"); + expect(content).toContain("Project:"); + expect(content).toContain("Status:"); + expect(content).toContain("Priority:"); + expect(content).toContain("Type:"); + expect(content).toContain("Creator:"); + }); + + test("should filter issues based on createdAfter date", async () => { + // First load at least 2 issues with different creation dates (ignoring time) + const baseIssues = await loadJiraIssuesUntil(haveTwoDifferentCreationDates); + if (baseIssues.length < 2) { + // Skip test if not enough issues available + return; + } + + // Create a map from date string without time to list of issues + const dateToIssueMap = new Map(); + baseIssues.forEach((issue) => { + const date = asStringWithoutTime(new Date(issue.fields.created)); + dateToIssueMap.set(date, (dateToIssueMap.get(date) ?? []).concat(issue)); + }); + // Convert map to list of {date, issues} + const issuesGroupedByDate = Array.from( + dateToIssueMap, + ([date, issues]) => ({ date, issues }) + ); + issuesGroupedByDate.sort((a, b) => a.date.localeCompare(b.date)); + + // Pick middle date to split issues in two groups + const middleIndex = Math.floor(issuesGroupedByDate.length / 2); + const middleDate = new Date(issuesGroupedByDate[middleIndex].date); + const issuesAfterMiddle = issuesGroupedByDate + .slice(middleIndex) + .flatMap(({ issues }) => issues); + + // Load issues created after middle date + const loader = new JiraProjectLoader({ + ...jiraConf, + createdAfter: middleDate, + }); + + const filteredDocs = await loader.load(); + + // Verify we got the expected issues + expect(filteredDocs.length).toBeGreaterThan(0); + expect(filteredDocs.length).toBeLessThan(baseIssues.length); + + // Verify all returned issues are created after our cutoff date + const middleDateTimestamp = middleDate.getTime(); + filteredDocs.forEach((doc) => { + const issueDateString = doc.pageContent + .split("\n") + .filter((line) => /^Created: /.test(line))[0] + .replace("Created: ", ""); + const issueDateTimestamp = new Date( + asStringWithoutTime(new Date(issueDateString)) + ).getTime(); + expect(issueDateTimestamp).toBeGreaterThanOrEqual(middleDateTimestamp); + }); + + // Verify we got the same issues as in our original set + const filteredIds = new Set(filteredDocs.map((d) => d.metadata.id)); + const expectedIds = new Set(issuesAfterMiddle.map((issue) => issue.id)); + expect(filteredIds).toEqual(expectedIds); + }); + + test("should handle invalid credentials", async () => { + const loader = new JiraProjectLoader({ + ...jiraConf, + username: "invalid_username", + accessToken: "invalid_token", + }); + + const docs = await loader.load(); + expect(docs).toEqual([]); + }); + + test("should handle invalid project key", async () => { + const loader = new JiraProjectLoader({ + ...jiraConf, + projectKey: "INVALID_PROJECT_KEY", + }); + + const docs = await loader.load(); + expect(docs).toEqual([]); + }); + + function requireEnvVar(name: string): string { + // eslint-disable-next-line no-process-env + const value = process.env[name]; + if (!value) { + throw new Error(`environment variable "${name}" must be set`); + } + return value; + } + + function asStringWithoutTime(date: Date): string { + return date.toISOString().split("T")[0]; + } + + function sameDate(a: Date, b: Date) { + return asStringWithoutTime(a) === asStringWithoutTime(b); + } + + function haveTwoDifferentCreationDates(issues: JiraIssue[]): boolean { + return ( + issues.length >= 2 && + issues + .slice(1) + .some( + (issue) => + !sameDate( + new Date(issue.fields.created), + new Date(issues[0].fields.created) + ) + ) + ); + } + + async function loadJiraDocsUntil(predicate: (docs: Document[]) => boolean) { + const load = (createdAfter: Date) => + new JiraProjectLoader({ + ...jiraConf, + createdAfter, + }).load(); + return loadUntil(load, predicate); + } + + async function loadJiraIssuesUntil( + predicate: (docs: JiraIssue[]) => boolean + ) { + const load = (createdAfter: Date) => + new JiraProjectLoader({ + ...jiraConf, + createdAfter, + }).loadAsIssues(); + return loadUntil(load, predicate); + } + + async function loadUntil( + loadCreatedAfter: (date: Date) => Promise, + predicate: (loaded: T[]) => boolean + ): Promise { + const now = new Date(); + let months = 1; + const maxMonths = 120; + + let loaded: T[] = []; + while (!predicate(loaded) && months < maxMonths) { + const createdAfter = new Date(now); + createdAfter.setDate(now.getDate() - months * 30); + loaded = await loadCreatedAfter(createdAfter); + months *= 1.2; + } + + if (months >= maxMonths) { + return []; + } + return loaded; + } +}); diff --git a/libs/langchain-community/src/document_loaders/tests/jira.test.ts b/libs/langchain-community/src/document_loaders/tests/jira.test.ts new file mode 100644 index 000000000000..92b1224446e0 --- /dev/null +++ b/libs/langchain-community/src/document_loaders/tests/jira.test.ts @@ -0,0 +1,267 @@ +import { faker } from "@faker-js/faker"; +import { + JiraDocumentConverter, + JiraIssue, + JiraUser, + JiraIssueType, + JiraPriority, + JiraProgress, + JiraProject, + JiraStatus, + JiraStatusCategory, +} from "../web/jira.js"; + +describe("JiraDocumentConverter Unit Tests", () => { + function getConverter() { + return new JiraDocumentConverter({ + projectKey: "PROJ", + host: "https://example.com", + }); + } + + it("should handle missing optional fields", () => { + const issue: JiraIssue = someJiraIssue(); + delete issue.fields.assignee; + delete issue.fields.duedate; + + const converter = getConverter(); + const document = converter.convertToDocuments([issue])[0]; + + expect(document).toBeDefined(); + expect(document.pageContent).toContain(issue.fields.summary); + expect(document.pageContent).toContain("Assignee: Unassigned"); + expect(document.pageContent).not.toMatch(/.*^Due Date: .*/m); + expect(document.metadata).toEqual({ + id: issue.id, + host: converter.host, + projectKey: converter.projectKey, + }); + }); + + it("should format the document content properly", () => { + const converter = getConverter(); + const issue = someJiraIssue(); + const document = converter.convertToDocuments([issue])[0]; + + expect(document.pageContent).toContain(issue.fields.summary); + expect(document.pageContent).toContain(issue.fields.description); + expect(document.pageContent).toContain( + issue.fields.labels?.join(", ") || "" + ); + expect(document.pageContent).toContain( + issue.fields.reporter?.displayName || "" + ); + expect(document.pageContent).toContain( + issue.fields.assignee?.displayName || "Unassigned" + ); + expect(document.pageContent).toContain(issue.fields.duedate || ""); + expect(document.pageContent).toContain( + issue.fields.timeestimate?.toString() || "" + ); + expect(document.pageContent).toContain( + issue.fields.timespent?.toString() || "" + ); + expect(document.pageContent).toContain(issue.fields.resolutiondate || ""); + expect(document.pageContent).toContain( + issue.fields.progress.percent?.toString() || "" + ); + }); +}); + +export function someJiraIssueType( + overrides: Partial = {} +): JiraIssueType { + const baseIssueType: JiraIssueType = { + avatarId: faker.number.int({ min: 1, max: 100 }), + description: faker.lorem.sentence(), + entityId: faker.string.uuid(), + hierarchyLevel: faker.number.int({ min: 1, max: 5 }), + iconUrl: faker.image.url(), + id: faker.string.numeric(5), + name: faker.helpers.arrayElement(["Bug", "Task", "Story", "Epic"]), + self: faker.internet.url(), + subtask: false, + }; + + return { + ...baseIssueType, + ...overrides, + }; +} + +export function someJiraUser(overrides: Partial = {}): JiraUser { + const baseUser = { + accountId: faker.string.uuid(), + accountType: "atlassian", + active: true, + avatarUrls: { + "16x16": faker.image.avatar(), + "24x24": faker.image.avatar(), + "32x32": faker.image.avatar(), + "48x48": faker.image.avatar(), + }, + displayName: faker.person.fullName(), + emailAddress: faker.internet.email(), + self: faker.internet.url(), + timeZone: faker.location.timeZone(), + }; + + return { + ...baseUser, + ...overrides, + }; +} + +export function someJiraPriority( + overrides: Partial = {} +): JiraPriority { + const basePriority: JiraPriority = { + iconUrl: faker.image.url(), + id: faker.string.numeric(2), + name: faker.helpers.arrayElement([ + "Highest", + "High", + "Medium", + "Low", + "Lowest", + ]), + self: faker.internet.url(), + }; + + return { + ...basePriority, + ...overrides, + }; +} + +export function someJiraProgress( + overrides: Partial = {} +): JiraProgress { + const baseProgress: JiraProgress = { + progress: faker.number.int({ min: 0, max: 100 }), + total: 100, + percent: faker.number.int({ min: 0, max: 100 }), + }; + + return { + ...baseProgress, + ...overrides, + }; +} + +export function someJiraProject( + overrides: Partial = {} +): JiraProject { + const baseProject: JiraProject = { + avatarUrls: { + "16x16": faker.image.avatar(), + "24x24": faker.image.avatar(), + "32x32": faker.image.avatar(), + "48x48": faker.image.avatar(), + }, + id: faker.string.numeric(5), + key: faker.string.alpha(4).toUpperCase(), + name: faker.company.name(), + projectTypeKey: "software", + self: faker.internet.url(), + simplified: false, + }; + + return { + ...baseProject, + ...overrides, + }; +} + +export function someJiraStatusCategory( + overrides: Partial = {} +): JiraStatusCategory { + const baseStatusCategory: JiraStatusCategory = { + self: faker.internet.url(), + id: faker.number.int({ min: 1, max: 5 }), + key: faker.helpers.arrayElement(["new", "indeterminate", "done"]), + colorName: faker.helpers.arrayElement(["blue-gray", "yellow", "green"]), + name: faker.helpers.arrayElement(["To Do", "In Progress", "Done"]), + }; + + return { + ...baseStatusCategory, + ...overrides, + }; +} + +export function someJiraStatus( + overrides: Partial = {} +): JiraStatus { + const baseStatus: JiraStatus = { + self: faker.internet.url(), + description: faker.lorem.sentence(), + iconUrl: faker.image.url(), + name: faker.helpers.arrayElement([ + "To Do", + "In Progress", + "Done", + "Blocked", + ]), + id: faker.string.numeric(2), + statusCategory: someJiraStatusCategory(), + }; + + return { + ...baseStatus, + ...overrides, + }; +} + +export function someJiraIssue(overrides: Partial = {}): JiraIssue { + const issueKey = `${faker.string.alpha(4).toUpperCase()}-${faker.number.int({ + min: 1, + max: 9999, + })}`; + + const baseIssue: JiraIssue = { + expand: "renderedFields", + id: faker.string.numeric(5), + self: `https://${faker.internet.domainName()}/rest/api/2/issue/${issueKey}`, + key: issueKey, + fields: { + assignee: faker.datatype.boolean() ? someJiraUser() : undefined, + created: faker.date.past().toISOString(), + description: faker.lorem.paragraph(), + issuelinks: [], + issuetype: someJiraIssueType(), + labels: faker.datatype.boolean() + ? Array.from({ length: faker.number.int({ min: 1, max: 5 }) }, () => + faker.word.noun() + ) + : undefined, + priority: someJiraPriority(), + progress: someJiraProgress(), + project: someJiraProject(), + reporter: faker.datatype.boolean() ? someJiraUser() : undefined, + creator: someJiraUser(), + resolutiondate: faker.datatype.boolean() + ? faker.date.recent().toISOString() + : undefined, + status: someJiraStatus(), + subtasks: [], + summary: faker.lorem.sentence(), + timeestimate: faker.datatype.boolean() + ? faker.number.int({ min: 1, max: 100 }) * 3600 + : undefined, + timespent: faker.datatype.boolean() + ? faker.number.int({ min: 1, max: 100 }) * 3600 + : undefined, + updated: faker.date.recent().toISOString(), + duedate: faker.datatype.boolean() + ? faker.date.future().toISOString() + : undefined, + }, + }; + console.log(baseIssue.fields.duedate); + + return { + ...baseIssue, + ...overrides, + }; +} diff --git a/libs/langchain-community/src/document_loaders/web/cheerio.ts b/libs/langchain-community/src/document_loaders/web/cheerio.ts index 106b1ffe9d33..72d06aed2d10 100644 --- a/libs/langchain-community/src/document_loaders/web/cheerio.ts +++ b/libs/langchain-community/src/document_loaders/web/cheerio.ts @@ -130,8 +130,9 @@ export class CheerioWebBaseLoader */ async load(): Promise { const $ = await this.scrape(); + const title = $("title").text(); const text = $(this.selector).text(); - const metadata = { source: this.webPath }; + const metadata = { source: this.webPath, title }; return [new Document({ pageContent: text, metadata })]; } diff --git a/libs/langchain-community/src/document_loaders/web/jira.ts b/libs/langchain-community/src/document_loaders/web/jira.ts new file mode 100644 index 000000000000..59e0879d2ab9 --- /dev/null +++ b/libs/langchain-community/src/document_loaders/web/jira.ts @@ -0,0 +1,441 @@ +import { Document } from "@langchain/core/documents"; +import { BaseDocumentLoader } from "@langchain/core/document_loaders/base"; + +export type JiraStatusCategory = { + self: string; + id: number; + key: string; + colorName: string; + name: string; +}; + +export type JiraStatus = { + self: string; + description: string; + iconUrl: string; + name: string; + id: string; + statusCategory: JiraStatusCategory; +}; + +export type JiraUser = { + accountId: string; + accountType: string; + active: boolean; + avatarUrls: { + "16x16": string; + "24x24": string; + "32x32": string; + "48x48": string; + }; + displayName: string; + emailAddress: string; + self: string; + timeZone: string; +}; + +export type JiraIssueType = { + avatarId: number; + description: string; + entityId: string; + hierarchyLevel: number; + iconUrl: string; + id: string; + name: string; + self: string; + subtask: boolean; +}; + +export type JiraPriority = { + iconUrl: string; + id: string; + name: string; + self: string; +}; + +export type JiraProgress = { + progress: number; + total: number; + percent?: number; +}; + +export type JiraProject = { + avatarUrls: { + "16x16": string; + "24x24": string; + "32x32": string; + "48x48": string; + }; + id: string; + key: string; + name: string; + projectTypeKey: string; + self: string; + simplified: boolean; +}; + +export type JiraSubTask = { + id: string; + key: string; + self: string; + fields: { + issuetype: JiraIssueType; + priority: JiraPriority; + status: JiraStatus; + summary: string; + }; +}; + +export type JiraIssueLinkType = { + id: string; + name: string; + inward: string; + outward: string; + self: string; +}; + +export type JiraBriefIssue = { + id: string; + key: string; + self: string; + fields: { + summary: string; + status: JiraStatus; + priority: JiraPriority; + issuetype: JiraIssueType; + }; +}; + +export type JiraIssueLink = { + id: string; + self: string; + type: JiraIssueLinkType; + inwardIssue?: JiraBriefIssue; + outwardIssue?: JiraBriefIssue; +}; + +export type JiraIssue = { + expand: string; + id: string; + self: string; + key: string; + fields: { + assignee?: JiraUser; + created: string; + description: string; + issuelinks: JiraIssueLink[]; + issuetype: JiraIssueType; + labels?: string[]; + priority: JiraPriority; + progress: JiraProgress; + project: JiraProject; + reporter?: JiraUser; + creator: JiraUser; + resolutiondate?: string; + status: JiraStatus; + subtasks: JiraSubTask[]; + summary: string; + timeestimate?: number; + timespent?: number; + updated: string; + duedate?: string; + parent?: JiraBriefIssue; + }; +}; + +export type JiraAPIResponse = { + expand: string; + startAt: number; + maxResults: number; + total: number; + issues: JiraIssue[]; +}; + +/** + * Interface representing the parameters for configuring the + * JiraDocumentConverter. + */ +export interface JiraDocumentConverterParams { + host: string; + projectKey: string; +} + +/** + * Class responsible for converting Jira issues to Document objects + */ +export class JiraDocumentConverter { + public readonly host: string; + + public readonly projectKey: string; + + constructor({ host, projectKey }: JiraDocumentConverterParams) { + this.host = host; + this.projectKey = projectKey; + } + + public convertToDocuments(issues: JiraIssue[]): Document[] { + return issues.map((issue) => this.documentFromIssue(issue)); + } + + private documentFromIssue(issue: JiraIssue): Document { + return new Document({ + pageContent: this.formatIssueInfo({ + issue, + host: this.host, + }), + metadata: { + id: issue.id, + host: this.host, + projectKey: this.projectKey, + }, + }); + } + + private formatIssueInfo({ + issue, + host, + }: { + issue: JiraIssue; + host: string; + }): string { + let text = `Issue: ${this.formatMainIssueInfoText({ issue, host })}\n`; + text += `Project: ${issue.fields.project.name} (${issue.fields.project.key}, ID ${issue.fields.project.id})\n`; + text += `Status: ${issue.fields.status.name}\n`; + text += `Priority: ${issue.fields.priority.name}\n`; + text += `Type: ${issue.fields.issuetype.name}\n`; + text += `Creator: ${issue.fields.creator.displayName}\n`; + + if (issue.fields.labels && issue.fields.labels.length > 0) { + text += `Labels: ${issue.fields.labels.join(", ")}\n`; + } + + text += `Created: ${issue.fields.created}\n`; + text += `Updated: ${issue.fields.updated}\n`; + + if (issue.fields.reporter) { + text += `Reporter: ${issue.fields.reporter.displayName}\n`; + } + + text += `Assignee: ${issue.fields.assignee?.displayName ?? "Unassigned"}\n`; + + if (issue.fields.duedate) { + text += `Due Date: ${issue.fields.duedate}\n`; + } + + if (issue.fields.timeestimate) { + text += `Time Estimate: ${issue.fields.timeestimate}\n`; + } + + if (issue.fields.timespent) { + text += `Time Spent: ${issue.fields.timespent}\n`; + } + + if (issue.fields.resolutiondate) { + text += `Resolution Date: ${issue.fields.resolutiondate}\n`; + } + + if (issue.fields.description) { + text += `Description: ${issue.fields.description}\n`; + } + + if (issue.fields.progress.percent) { + text += `Progress: ${issue.fields.progress.percent}%\n`; + } + + if (issue.fields.parent) { + text += `Parent Issue: ${this.formatMainIssueInfoText({ + issue: issue.fields.parent, + host, + })}\n`; + } + + if (issue.fields.subtasks.length > 0) { + text += `Subtasks:\n`; + issue.fields.subtasks.forEach((subtask) => { + text += ` - ${this.formatMainIssueInfoText({ + issue: subtask, + host, + })}\n`; + }); + } + + if (issue.fields.issuelinks.length > 0) { + text += `Issue Links:\n`; + issue.fields.issuelinks.forEach((link) => { + text += ` - ${link.type.name}\n`; + if (link.inwardIssue) { + text += ` - ${this.formatMainIssueInfoText({ + issue: link.inwardIssue, + host, + })}\n`; + } + if (link.outwardIssue) { + text += ` - ${this.formatMainIssueInfoText({ + issue: link.outwardIssue, + host, + })}\n`; + } + }); + } + + return text; + } + + private getLinkToIssue({ + issueKey, + host, + }: { + issueKey: string; + host: string; + }): string { + return `${host}/browse/${issueKey}`; + } + + private formatMainIssueInfoText({ + issue, + host, + }: { + issue: JiraIssue | JiraBriefIssue; + host: string; + }): string { + const link = this.getLinkToIssue({ + issueKey: issue.key, + host, + }); + + const text = `${issue.key} (ID ${issue.id}) - ${issue.fields.summary} (${link})`; + + return text; + } +} + +/** + * Interface representing the parameters for configuring the + * JiraProjectLoader. + */ +export interface JiraProjectLoaderParams { + host: string; + projectKey: string; + username: string; + accessToken: string; + limitPerRequest?: number; + createdAfter?: Date; +} + +const API_ENDPOINTS = { + SEARCH: "/rest/api/2/search", +}; + +/** + * Class representing a document loader for loading pages from Confluence. + */ +export class JiraProjectLoader extends BaseDocumentLoader { + private readonly accessToken: string; + + public readonly host: string; + + public readonly projectKey: string; + + public readonly username: string; + + public readonly limitPerRequest: number; + + private readonly createdAfter?: Date; + + private readonly documentConverter: JiraDocumentConverter; + + constructor({ + host, + projectKey, + username, + accessToken, + limitPerRequest = 100, + createdAfter, + }: JiraProjectLoaderParams) { + super(); + this.host = host; + this.projectKey = projectKey; + this.username = username; + this.accessToken = accessToken; + this.limitPerRequest = limitPerRequest; + this.createdAfter = createdAfter; + this.documentConverter = new JiraDocumentConverter({ host, projectKey }); + } + + private buildAuthorizationHeader(): string { + return `Basic ${Buffer.from( + `${this.username}:${this.accessToken}` + ).toString("base64")}`; + } + + public async load(): Promise { + try { + const allJiraIssues = await this.loadAsIssues(); + return this.documentConverter.convertToDocuments(allJiraIssues); + } catch (error) { + console.error("Error:", error); + return []; + } + } + + public async loadAsIssues(): Promise { + const allIssues: JiraIssue[] = []; + + for await (const issues of this.fetchIssues()) { + allIssues.push(...issues); + } + + return allIssues; + } + + protected toJiraDateString(date: Date | undefined): string | undefined { + if (!date) { + return undefined; + } + const year = date.getFullYear(); + const month = String(date.getMonth() + 1).padStart(2, "0"); + const dayOfMonth = String(date.getDate()).padStart(2, "0"); + return `${year}-${month}-${dayOfMonth}`; + } + + protected async *fetchIssues(): AsyncIterable { + const authorizationHeader = this.buildAuthorizationHeader(); + const url = `${this.host}${API_ENDPOINTS.SEARCH}`; + const createdAfterAsString = this.toJiraDateString(this.createdAfter); + let startAt = 0; + + while (true) { + try { + const jqlProps = [ + `project=${this.projectKey}`, + ...(createdAfterAsString ? [`created>=${createdAfterAsString}`] : []), + ]; + const params = new URLSearchParams({ + jql: jqlProps.join(" AND "), + startAt: `${startAt}`, + maxResults: `${this.limitPerRequest}`, + }); + const pageUrl = `${url}?${params}`; + + const options = { + method: "GET", + headers: { + Authorization: authorizationHeader, + Accept: "application/json", + }, + }; + + const response = await fetch(pageUrl, options); + const data: JiraAPIResponse = await response.json(); + + if (!data.issues || data.issues.length === 0) break; + + yield data.issues; + startAt += this.limitPerRequest; + } catch (error) { + console.error(error); + yield []; + } + } + } +} diff --git a/libs/langchain-community/src/embeddings/hf_transformers.ts b/libs/langchain-community/src/embeddings/hf_transformers.ts index 08175dccbb7a..dba24638cf28 100644 --- a/libs/langchain-community/src/embeddings/hf_transformers.ts +++ b/libs/langchain-community/src/embeddings/hf_transformers.ts @@ -1,3 +1,5 @@ +/* eslint-disable */ +// @ts-nocheck import type { PretrainedOptions, FeatureExtractionPipelineOptions, @@ -6,6 +8,11 @@ import type { import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings"; import { chunkArray } from "@langchain/core/utils/chunk_array"; +/** + * @deprecated Import from + * "@langchain/community/embeddings/huggingface_transformers" + * instead and use the new "@huggingface/transformers" peer dependency. + */ export interface HuggingFaceTransformersEmbeddingsParams extends EmbeddingsParams { /** @@ -13,6 +20,7 @@ export interface HuggingFaceTransformersEmbeddingsParams * Alias for `model` */ modelName: string; + /** Model name to use */ model: string; @@ -42,24 +50,10 @@ export interface HuggingFaceTransformersEmbeddingsParams */ pipelineOptions?: FeatureExtractionPipelineOptions; } - /** - * @example - * ```typescript - * const model = new HuggingFaceTransformersEmbeddings({ - * model: "Xenova/all-MiniLM-L6-v2", - * }); - * - * // Embed a single query - * const res = await model.embedQuery( - * "What would be a good company name for a company that makes colorful socks?" - * ); - * console.log({ res }); - * - * // Embed multiple documents - * const documentRes = await model.embedDocuments(["Hello world", "Bye bye"]); - * console.log({ documentRes }); - * ``` + * @deprecated Import from + * "@langchain/community/embeddings/huggingface_transformers" + * instead and use the new "@huggingface/transformers" peer dependency. */ export class HuggingFaceTransformersEmbeddings extends Embeddings @@ -83,7 +77,6 @@ export class HuggingFaceTransformersEmbeddings constructor(fields?: Partial) { super(fields ?? {}); - this.modelName = fields?.model ?? fields?.modelName ?? this.model; this.model = this.modelName; this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines; @@ -95,27 +88,22 @@ export class HuggingFaceTransformersEmbeddings ...fields?.pipelineOptions, }; } - async embedDocuments(texts: string[]): Promise { const batches = chunkArray( this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, this.batchSize ); - const batchRequests = batches.map((batch) => this.runEmbedding(batch)); const batchResponses = await Promise.all(batchRequests); const embeddings: number[][] = []; - for (let i = 0; i < batchResponses.length; i += 1) { const batchResponse = batchResponses[i]; for (let j = 0; j < batchResponse.length; j += 1) { embeddings.push(batchResponse[j]); } } - return embeddings; } - async embedQuery(text: string): Promise { const data = await this.runEmbedding([ this.stripNewLines ? text.replace(/\n/g, " ") : text, diff --git a/libs/langchain-community/src/embeddings/huggingface_transformers.ts b/libs/langchain-community/src/embeddings/huggingface_transformers.ts new file mode 100644 index 000000000000..03f458c51f38 --- /dev/null +++ b/libs/langchain-community/src/embeddings/huggingface_transformers.ts @@ -0,0 +1,128 @@ +import type { + PretrainedOptions, + FeatureExtractionPipelineOptions, + FeatureExtractionPipeline, +} from "@huggingface/transformers"; +import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings"; +import { chunkArray } from "@langchain/core/utils/chunk_array"; + +export interface HuggingFaceTransformersEmbeddingsParams + extends EmbeddingsParams { + /** Model name to use */ + model: string; + + /** + * Timeout to use when making requests to OpenAI. + */ + timeout?: number; + + /** + * The maximum number of documents to embed in a single request. + */ + batchSize?: number; + + /** + * Whether to strip new lines from the input text. This is recommended by + * OpenAI, but may not be suitable for all use cases. + */ + stripNewLines?: boolean; + + /** + * Optional parameters for the pretrained model. + */ + pretrainedOptions?: PretrainedOptions; + + /** + * Optional parameters for the pipeline. + */ + pipelineOptions?: FeatureExtractionPipelineOptions; +} + +/** + * @example + * ```typescript + * const model = new HuggingFaceTransformersEmbeddings({ + * model: "Xenova/all-MiniLM-L6-v2", + * }); + * + * // Embed a single query + * const res = await model.embedQuery( + * "What would be a good company name for a company that makes colorful socks?" + * ); + * console.log({ res }); + * + * // Embed multiple documents + * const documentRes = await model.embedDocuments(["Hello world", "Bye bye"]); + * console.log({ documentRes }); + * ``` + */ +export class HuggingFaceTransformersEmbeddings + extends Embeddings + implements HuggingFaceTransformersEmbeddingsParams +{ + model = "Xenova/all-MiniLM-L6-v2"; + + batchSize = 512; + + stripNewLines = true; + + timeout?: number; + + pretrainedOptions?: PretrainedOptions; + + pipelineOptions?: FeatureExtractionPipelineOptions; + + private pipelinePromise: Promise; + + constructor(fields?: Partial) { + super(fields ?? {}); + + this.model = fields?.model ?? this.model; + this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines; + this.timeout = fields?.timeout; + this.pretrainedOptions = fields?.pretrainedOptions ?? {}; + this.pipelineOptions = { + pooling: "mean", + normalize: true, + ...fields?.pipelineOptions, + }; + } + + async embedDocuments(texts: string[]): Promise { + const batches = chunkArray( + this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, + this.batchSize + ); + + const batchRequests = batches.map((batch) => this.runEmbedding(batch)); + const batchResponses = await Promise.all(batchRequests); + const embeddings: number[][] = []; + + for (let i = 0; i < batchResponses.length; i += 1) { + const batchResponse = batchResponses[i]; + for (let j = 0; j < batchResponse.length; j += 1) { + embeddings.push(batchResponse[j]); + } + } + + return embeddings; + } + + async embedQuery(text: string): Promise { + const data = await this.runEmbedding([ + this.stripNewLines ? text.replace(/\n/g, " ") : text, + ]); + return data[0]; + } + + private async runEmbedding(texts: string[]) { + const pipe = await (this.pipelinePromise ??= ( + await import("@huggingface/transformers") + ).pipeline("feature-extraction", this.model, this.pretrainedOptions)); + + return this.caller.call(async () => { + const output = await pipe(texts, this.pipelineOptions); + return output.tolist(); + }); + } +} diff --git a/libs/langchain-community/src/embeddings/tests/hf_transformers.int.test.ts b/libs/langchain-community/src/embeddings/tests/huggingface_transformers.int.test.ts similarity index 91% rename from libs/langchain-community/src/embeddings/tests/hf_transformers.int.test.ts rename to libs/langchain-community/src/embeddings/tests/huggingface_transformers.int.test.ts index de67cc9a63b0..e10caf1e7032 100644 --- a/libs/langchain-community/src/embeddings/tests/hf_transformers.int.test.ts +++ b/libs/langchain-community/src/embeddings/tests/huggingface_transformers.int.test.ts @@ -1,5 +1,5 @@ import { test, expect } from "@jest/globals"; -import { HuggingFaceTransformersEmbeddings } from "../hf_transformers.js"; +import { HuggingFaceTransformersEmbeddings } from "../huggingface_transformers.js"; import { HNSWLib } from "../../vectorstores/hnswlib.js"; test("HuggingFaceTransformersEmbeddings", async () => { diff --git a/libs/langchain-community/src/load/import_constants.ts b/libs/langchain-community/src/load/import_constants.ts index 014d418e872d..6ac412ca9543 100644 --- a/libs/langchain-community/src/load/import_constants.ts +++ b/libs/langchain-community/src/load/import_constants.ts @@ -15,6 +15,7 @@ export const optionalImportEntrypoints: string[] = [ "langchain_community/embeddings/gradient_ai", "langchain_community/embeddings/hf", "langchain_community/embeddings/hf_transformers", + "langchain_community/embeddings/huggingface_transformers", "langchain_community/embeddings/ibm", "langchain_community/embeddings/jina", "langchain_community/embeddings/llama_cpp", diff --git a/libs/langchain-community/src/load/import_map.ts b/libs/langchain-community/src/load/import_map.ts index 6d00b2f172f9..624b789eb34a 100644 --- a/libs/langchain-community/src/load/import_map.ts +++ b/libs/langchain-community/src/load/import_map.ts @@ -78,6 +78,7 @@ export * as indexes__base from "../indexes/base.js"; export * as indexes__memory from "../indexes/memory.js"; export * as document_loaders__web__airtable from "../document_loaders/web/airtable.js"; export * as document_loaders__web__html from "../document_loaders/web/html.js"; +export * as document_loaders__web__jira from "../document_loaders/web/jira.js"; export * as document_loaders__web__searchapi from "../document_loaders/web/searchapi.js"; export * as document_loaders__web__serpapi from "../document_loaders/web/serpapi.js"; export * as document_loaders__web__sort_xyz_blockchain from "../document_loaders/web/sort_xyz_blockchain.js"; diff --git a/libs/langchain-google-common/package.json b/libs/langchain-google-common/package.json index f5d32045f91c..5f32db49fc6a 100644 --- a/libs/langchain-google-common/package.json +++ b/libs/langchain-google-common/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/google-common", - "version": "0.1.5", + "version": "0.1.6", "description": "Core types and classes for Google services.", "type": "module", "engines": { diff --git a/libs/langchain-google-common/src/index.ts b/libs/langchain-google-common/src/index.ts index 373617c72e55..d1a37b6f5c70 100644 --- a/libs/langchain-google-common/src/index.ts +++ b/libs/langchain-google-common/src/index.ts @@ -2,6 +2,8 @@ export * from "./chat_models.js"; export * from "./llms.js"; export * from "./embeddings.js"; +export * from "./output_parsers.js"; + export * from "./auth.js"; export * from "./connection.js"; export * from "./types.js"; diff --git a/libs/langchain-google-common/src/output_parsers.ts b/libs/langchain-google-common/src/output_parsers.ts new file mode 100644 index 000000000000..8ca9377011ba --- /dev/null +++ b/libs/langchain-google-common/src/output_parsers.ts @@ -0,0 +1,271 @@ +import { BaseLLMOutputParser } from "@langchain/core/output_parsers"; +import { Callbacks } from "@langchain/core/callbacks/manager"; +import { ChatGeneration, Generation } from "@langchain/core/outputs"; +import { MessageContent } from "@langchain/core/messages"; +import { + GeminiGroundingChunk, + GeminiGroundingMetadata, + GeminiGroundingSupport, +} from "./types.js"; + +type Generations = Generation[] | ChatGeneration[]; + +type GroundingInfo = { + metadata: GeminiGroundingMetadata; + supports: GeminiGroundingSupport[]; +}; + +export abstract class BaseGoogleSearchOutputParser extends BaseLLMOutputParser { + lc_namespace: string[] = ["google_common", "output_parsers"]; + + protected generationToGroundingInfo( + generation: Generation | ChatGeneration + ): GroundingInfo | undefined { + if ("message" in generation) { + const responseMetadata = generation?.message?.response_metadata; + const metadata = responseMetadata.groundingMetadata; + const supports = + responseMetadata.groundingSupport ?? metadata.groundingSupports ?? []; + if (metadata) { + return { + metadata, + supports, + }; + } + } + return undefined; + } + + protected generationsToGroundingInfo( + generations: Generations + ): GroundingInfo | undefined { + for (const generation of generations) { + const info = this.generationToGroundingInfo(generation); + if (info !== undefined) { + return info; + } + } + return undefined; + } + + protected generationToString( + generation: Generation | ChatGeneration + ): string { + if ("message" in generation) { + const content: MessageContent = generation?.message?.content; + if (typeof content === "string") { + return content; + } else { + return content + .map((c) => { + if (c?.type === "text") { + return c?.text ?? ""; + } else { + return ""; + } + }) + .reduce( + (previousValue, currentValue) => `${previousValue}${currentValue}` + ); + } + } + return generation.text; + } + + protected generationsToString(generations: Generations): string { + return generations + .map((generation) => this.generationToString(generation)) + .reduce( + (previousValue, currentValue) => `${previousValue}${currentValue}` + ); + } + + protected abstract segmentPrefix( + grounding: GroundingInfo, + support: GeminiGroundingSupport, + index: number + ): string | undefined; + + protected abstract segmentSuffix( + grounding: GroundingInfo, + support: GeminiGroundingSupport, + index: number + ): string | undefined; + + protected annotateSegment( + text: string, + grounding: GroundingInfo, + support: GeminiGroundingSupport, + index: number + ): string { + const start = support.segment.startIndex ?? 0; + const end = support.segment.endIndex; + + const textBefore = text.substring(0, start); + const textSegment = text.substring(start, end); + const textAfter = text.substring(end); + + const textPrefix = this.segmentPrefix(grounding, support, index) ?? ""; + const textSuffix = this.segmentSuffix(grounding, support, index) ?? ""; + + return `${textBefore}${textPrefix}${textSegment}${textSuffix}${textAfter}`; + } + + protected annotateTextSegments( + text: string, + grounding: GroundingInfo + ): string { + // Go through each support info in reverse, since the segment info + // is sorted, and we won't need to adjust string indexes this way. + let ret = text; + for (let co = grounding.supports.length - 1; co >= 0; co -= 1) { + const support = grounding.supports[co]; + ret = this.annotateSegment(ret, grounding, support, co); + } + return ret; + } + + protected abstract textPrefix( + text: string, + grounding: GroundingInfo + ): string | undefined; + + protected abstract textSuffix( + text: string, + grounding: GroundingInfo + ): string | undefined; + + /** + * Google requires us to + * "Display the Search Suggestion exactly as provided without any modifications" + * So this will typically be called from the textSuffix() method to get + * a string that renders HTML. + * See https://ai.google.dev/gemini-api/docs/grounding/search-suggestions + * @param grounding + */ + protected searchSuggestion(grounding: GroundingInfo): string { + return grounding.metadata.searchEntryPoint?.renderedContent ?? ""; + } + + protected annotateText(text: string, grounding: GroundingInfo): string { + const prefix = this.textPrefix(text, grounding) ?? ""; + const suffix = this.textSuffix(text, grounding) ?? ""; + const body = this.annotateTextSegments(text, grounding); + return `${prefix}${body}${suffix}`; + } + + async parseResult( + generations: Generations, + _callbacks?: Callbacks + ): Promise { + const text = this.generationsToString(generations); + + const grounding = this.generationsToGroundingInfo(generations); + if (!grounding) { + return text; + } + + return this.annotateText(text, grounding); + } +} + +export class SimpleGoogleSearchOutputParser extends BaseGoogleSearchOutputParser { + protected segmentPrefix( + _grounding: GroundingInfo, + _support: GeminiGroundingSupport, + _index: number + ): string | undefined { + return undefined; + } + + protected segmentSuffix( + _grounding: GroundingInfo, + support: GeminiGroundingSupport, + _index: number + ): string | undefined { + const indices: number[] = support.groundingChunkIndices.map((i) => i + 1); + return ` [${indices.join(", ")}]`; + } + + protected textPrefix(_text: string, _grounding: GroundingInfo): string { + return "Google Says:\n"; + } + + protected chunkToString(chunk: GeminiGroundingChunk, index: number): string { + const info = chunk.retrievedContext ?? chunk.web; + return `${index + 1}. ${info.title} - ${info.uri}`; + } + + protected textSuffix(_text: string, grounding: GroundingInfo): string { + let ret = "\n"; + const chunks: GeminiGroundingChunk[] = grounding.metadata.groundingChunks; + chunks.forEach((chunk, index) => { + ret = `${ret}${this.chunkToString(chunk, index)}\n`; + }); + return ret; + } +} + +export class MarkdownGoogleSearchOutputParser extends BaseGoogleSearchOutputParser { + protected segmentPrefix( + _grounding: GroundingInfo, + _support: GeminiGroundingSupport, + _index: number + ): string | undefined { + return undefined; + } + + protected chunkLink(grounding: GroundingInfo, index: number): string { + const chunk = grounding.metadata.groundingChunks[index]; + const url = chunk.retrievedContext?.uri ?? chunk.web?.uri; + const num = index + 1; + return `[[${num}](${url})]`; + } + + protected segmentSuffix( + grounding: GroundingInfo, + support: GeminiGroundingSupport, + _index: number + ): string | undefined { + let ret = ""; + support.groundingChunkIndices.forEach((chunkIndex) => { + const link = this.chunkLink(grounding, chunkIndex); + ret = `${ret}${link}`; + }); + return ret; + } + + protected textPrefix( + _text: string, + _grounding: GroundingInfo + ): string | undefined { + return undefined; + } + + protected chunkSuffixLink( + chunk: GeminiGroundingChunk, + index: number + ): string { + const num = index + 1; + const info = chunk.retrievedContext ?? chunk.web; + const url = info.uri; + const site = info.title; + return `${num}. [${site}](${url})`; + } + + protected textSuffix( + _text: string, + grounding: GroundingInfo + ): string | undefined { + let ret = "\n**Search Sources**\n"; + const chunks: GeminiGroundingChunk[] = grounding.metadata.groundingChunks; + chunks.forEach((chunk, index) => { + ret = `${ret}${this.chunkSuffixLink(chunk, index)}\n`; + }); + + const search = this.searchSuggestion(grounding); + ret = `${ret}\n${search}`; + + return ret; + } +} diff --git a/libs/langchain-google-common/src/tests/chat_models.test.ts b/libs/langchain-google-common/src/tests/chat_models.test.ts index 5726d9fd445e..0192c5b67710 100644 --- a/libs/langchain-google-common/src/tests/chat_models.test.ts +++ b/libs/langchain-google-common/src/tests/chat_models.test.ts @@ -35,7 +35,7 @@ import { import { removeAdditionalProperties } from "../utils/zod_to_gemini_parameters.js"; import { MessageGeminiSafetyHandler } from "../utils/index.js"; -class ChatGoogle extends ChatGoogleBase { +export class ChatGoogle extends ChatGoogleBase { constructor(fields?: ChatGoogleBaseInput) { super(fields); } diff --git a/libs/langchain-google-common/src/tests/data/chat-6-mock.json b/libs/langchain-google-common/src/tests/data/chat-6-mock.json index 796fdcf9bcee..f4966c2e2f94 100644 --- a/libs/langchain-google-common/src/tests/data/chat-6-mock.json +++ b/libs/langchain-google-common/src/tests/data/chat-6-mock.json @@ -67,7 +67,7 @@ "endIndex": 611, "text": "Shohei Ohtani, in his first year with the Dodgers, also experienced his first post-season appearance." }, - "groundingChunkIndices": [0], + "groundingChunkIndices": [0, 2], "confidenceScores": [0.95767003] } ], diff --git a/libs/langchain-google-common/src/tests/output_parsers.test.ts b/libs/langchain-google-common/src/tests/output_parsers.test.ts new file mode 100644 index 000000000000..ac3c135f5279 --- /dev/null +++ b/libs/langchain-google-common/src/tests/output_parsers.test.ts @@ -0,0 +1,217 @@ +/* eslint-disable @typescript-eslint/no-explicit-any */ +import { test } from "@jest/globals"; +import { MockClientAuthInfo, mockId } from "./mock.js"; +import { ChatGoogle } from "./chat_models.test.js"; +import { + MarkdownGoogleSearchOutputParser, + SimpleGoogleSearchOutputParser, +} from "../output_parsers.js"; + +describe("GoogleSearchOutputParsers", () => { + test("Simple", async () => { + const record: Record = {}; + const projectId = mockId(); + const authOptions: MockClientAuthInfo = { + record, + projectId, + resultFile: "chat-6-mock.json", + }; + + const searchRetrievalTool = { + googleSearchRetrieval: { + dynamicRetrievalConfig: { + mode: "MODE_DYNAMIC", + dynamicThreshold: 0.7, // default is 0.7 + }, + }, + }; + const model = new ChatGoogle({ + authOptions, + modelName: "gemini-1.5-pro-002", + temperature: 0, + maxRetries: 0, + }).bindTools([searchRetrievalTool]); + + const parser = new SimpleGoogleSearchOutputParser(); + + const chain = model.pipe(parser); + + const result = await chain.invoke("Who won the 2024 MLB World Series?"); + + const expectation = + "Google Says:\n" + + "The Los Angeles Dodgers won the 2024 World Series, defeating the New York Yankees 4-1 in the series. [1] The Dodgers clinched the title with a 7-6 comeback victory in Game 5 at Yankee Stadium on Wednesday, October 30th. This was their eighth World Series title overall and their second in the past five years. It was also their first World Series win in a full season since 1988. [2] Mookie Betts earned his third World Series ring (2018, 2020, and 2024), becoming the only active player with three championships. [3] Shohei Ohtani, in his first year with the Dodgers, also experienced his first post-season appearance. [1, 3]\n" + + "\n" + + "1. bbc.com - https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcTYmdnM71OvWYUTG4JggmRj8cIIgA2KtKas5RPj09CiALB4n8hl-SfCD6r8WnimL2psBoYmEN9ng9sENjpeP5VxgLMTlm0zgxhrWFfx3yA6B_n0N9j-BgHLISAUi-_Ql4_Buyw68Svq-3v6BgrXzn9hLOtK\n" + + "2. mlb.com - https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcQRhhvHTdpb8OMOEMVxv9fkevPoMWMnhrpuC7E0E0R94xmFxT9Vv5na1hMrfHGKxVZ9aE3PgCAs5nftC3iAkeD7B6ZTfKGH2Im1CqssMM7zorGx1Ds5_7QPPBDQps_JvpkOuvRluGCVg8KwNaIU-hm3Kg==\n" + + "3. youtube.com - https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcSwvb2t622A2ZpKxqOWKy16L1mEUvmsAJoHjaR7uffKO71SeZkpdRXRsST9HJzJkGSkMF9kOaXGoDtcvUrttqKYOQHvHSUBYO7LWMlU00KyNlSoQzrBsgN4KuJ4O4acnNyNCSVX3-E=\n"; + + expect(result).toEqual(expectation); + }); + + test("Markdown", async () => { + const record: Record = {}; + const projectId = mockId(); + const authOptions: MockClientAuthInfo = { + record, + projectId, + resultFile: "chat-6-mock.json", + }; + + const searchRetrievalTool = { + googleSearchRetrieval: { + dynamicRetrievalConfig: { + mode: "MODE_DYNAMIC", + dynamicThreshold: 0.7, // default is 0.7 + }, + }, + }; + const model = new ChatGoogle({ + authOptions, + modelName: "gemini-1.5-pro-002", + temperature: 0, + maxRetries: 0, + }).bindTools([searchRetrievalTool]); + + const parser = new MarkdownGoogleSearchOutputParser(); + + const chain = model.pipe(parser); + + const result = await chain.invoke("Who won the 2024 MLB World Series?"); + + const expectation = + "The Los Angeles Dodgers won the 2024 World Series, defeating the New York Yankees 4-1 in the series.[[1](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcTYmdnM71OvWYUTG4JggmRj8cIIgA2KtKas5RPj09CiALB4n8hl-SfCD6r8WnimL2psBoYmEN9ng9sENjpeP5VxgLMTlm0zgxhrWFfx3yA6B_n0N9j-BgHLISAUi-_Ql4_Buyw68Svq-3v6BgrXzn9hLOtK)] The Dodgers clinched the title with a 7-6 comeback victory in Game 5 at Yankee Stadium on Wednesday, October 30th. This was their eighth World Series title overall and their second in the past five years. It was also their first World Series win in a full season since 1988.[[2](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcQRhhvHTdpb8OMOEMVxv9fkevPoMWMnhrpuC7E0E0R94xmFxT9Vv5na1hMrfHGKxVZ9aE3PgCAs5nftC3iAkeD7B6ZTfKGH2Im1CqssMM7zorGx1Ds5_7QPPBDQps_JvpkOuvRluGCVg8KwNaIU-hm3Kg==)] Mookie Betts earned his third World Series ring (2018, 2020, and 2024), becoming the only active player with three championships.[[3](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcSwvb2t622A2ZpKxqOWKy16L1mEUvmsAJoHjaR7uffKO71SeZkpdRXRsST9HJzJkGSkMF9kOaXGoDtcvUrttqKYOQHvHSUBYO7LWMlU00KyNlSoQzrBsgN4KuJ4O4acnNyNCSVX3-E=)] Shohei Ohtani, in his first year with the Dodgers, also experienced his first post-season appearance.[[1](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcTYmdnM71OvWYUTG4JggmRj8cIIgA2KtKas5RPj09CiALB4n8hl-SfCD6r8WnimL2psBoYmEN9ng9sENjpeP5VxgLMTlm0zgxhrWFfx3yA6B_n0N9j-BgHLISAUi-_Ql4_Buyw68Svq-3v6BgrXzn9hLOtK)][[3](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcSwvb2t622A2ZpKxqOWKy16L1mEUvmsAJoHjaR7uffKO71SeZkpdRXRsST9HJzJkGSkMF9kOaXGoDtcvUrttqKYOQHvHSUBYO7LWMlU00KyNlSoQzrBsgN4KuJ4O4acnNyNCSVX3-E=)]\n" + + "\n" + + "**Search Sources**\n" + + "1. [bbc.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcTYmdnM71OvWYUTG4JggmRj8cIIgA2KtKas5RPj09CiALB4n8hl-SfCD6r8WnimL2psBoYmEN9ng9sENjpeP5VxgLMTlm0zgxhrWFfx3yA6B_n0N9j-BgHLISAUi-_Ql4_Buyw68Svq-3v6BgrXzn9hLOtK)\n" + + "2. [mlb.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcQRhhvHTdpb8OMOEMVxv9fkevPoMWMnhrpuC7E0E0R94xmFxT9Vv5na1hMrfHGKxVZ9aE3PgCAs5nftC3iAkeD7B6ZTfKGH2Im1CqssMM7zorGx1Ds5_7QPPBDQps_JvpkOuvRluGCVg8KwNaIU-hm3Kg==)\n" + + "3. [youtube.com](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AYygrcSwvb2t622A2ZpKxqOWKy16L1mEUvmsAJoHjaR7uffKO71SeZkpdRXRsST9HJzJkGSkMF9kOaXGoDtcvUrttqKYOQHvHSUBYO7LWMlU00KyNlSoQzrBsgN4KuJ4O4acnNyNCSVX3-E=)\n" + + "\n" + + "\n" + + '
\n' + + '
\n' + + ' \n' + + ' \n' + + ' \n' + + ' \n' + + ' \n' + + " \n" + + ' \n' + + ' \n' + + ' \n' + + ' \n' + + ' \n' + + ' \n' + + " \n" + + '
\n' + + "
\n" + + ' \n" + + "
\n"; + + expect(result).toEqual(expectation); + }); +}); diff --git a/libs/langchain-google-gauth/package.json b/libs/langchain-google-gauth/package.json index c8851626c9f8..405415bddab3 100644 --- a/libs/langchain-google-gauth/package.json +++ b/libs/langchain-google-gauth/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/google-gauth", - "version": "0.1.5", + "version": "0.1.6", "description": "Google auth based authentication support for Google services", "type": "module", "engines": { @@ -35,7 +35,7 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@langchain/google-common": "~0.1.5", + "@langchain/google-common": "~0.1.6", "google-auth-library": "^8.9.0" }, "peerDependencies": { diff --git a/libs/langchain-google-vertexai-web/package.json b/libs/langchain-google-vertexai-web/package.json index 5d52d4680fce..80cbde8a1d85 100644 --- a/libs/langchain-google-vertexai-web/package.json +++ b/libs/langchain-google-vertexai-web/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/google-vertexai-web", - "version": "0.1.5", + "version": "0.1.6", "description": "LangChain.js support for Google Vertex AI Web", "type": "module", "engines": { @@ -32,7 +32,7 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@langchain/google-webauth": "~0.1.5" + "@langchain/google-webauth": "~0.1.6" }, "peerDependencies": { "@langchain/core": ">=0.2.21 <0.4.0" diff --git a/libs/langchain-google-vertexai/package.json b/libs/langchain-google-vertexai/package.json index aa7f52592fec..ce1819caefe8 100644 --- a/libs/langchain-google-vertexai/package.json +++ b/libs/langchain-google-vertexai/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/google-vertexai", - "version": "0.1.5", + "version": "0.1.6", "description": "LangChain.js support for Google Vertex AI", "type": "module", "engines": { @@ -32,7 +32,7 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@langchain/google-gauth": "~0.1.5" + "@langchain/google-gauth": "~0.1.6" }, "peerDependencies": { "@langchain/core": ">=0.2.21 <0.4.0" diff --git a/libs/langchain-google-webauth/package.json b/libs/langchain-google-webauth/package.json index 0a46b9221e31..c200748b3e81 100644 --- a/libs/langchain-google-webauth/package.json +++ b/libs/langchain-google-webauth/package.json @@ -1,6 +1,6 @@ { "name": "@langchain/google-webauth", - "version": "0.1.5", + "version": "0.1.6", "description": "Web-based authentication support for Google services", "type": "module", "engines": { @@ -32,7 +32,7 @@ "author": "LangChain", "license": "MIT", "dependencies": { - "@langchain/google-common": "~0.1.5", + "@langchain/google-common": "~0.1.6", "web-auth-library": "^1.0.3" }, "peerDependencies": { diff --git a/yarn.lock b/yarn.lock index abae3190907d..fa91e3d3f1ea 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10560,13 +10560,6 @@ __metadata: languageName: node linkType: hard -"@huggingface/jinja@npm:^0.2.2": - version: 0.2.2 - resolution: "@huggingface/jinja@npm:0.2.2" - checksum: 8a6e3e287863d437920990afa2ca25d83c51997bd5ba0325ea90633e52469c2d901178cbd758cc362b45ad1c9521fccf372884fd59e58d2916d6b2e5bb15f776 - languageName: node - linkType: hard - "@huggingface/jinja@npm:^0.3.1": version: 0.3.1 resolution: "@huggingface/jinja@npm:0.3.1" @@ -10574,6 +10567,25 @@ __metadata: languageName: node linkType: hard +"@huggingface/jinja@npm:^0.3.2": + version: 0.3.2 + resolution: "@huggingface/jinja@npm:0.3.2" + checksum: 4bc7d00b6f8655a0032c2d89e38a095d0a87ef81a1c12fb6fd0404e1319e1ef6eef87734502689c1df39db4e77a7bb5996e7b6c1b4d6a768ecfa5a48f2a939a7 + languageName: node + linkType: hard + +"@huggingface/transformers@npm:^3.2.3": + version: 3.2.4 + resolution: "@huggingface/transformers@npm:3.2.4" + dependencies: + "@huggingface/jinja": ^0.3.2 + onnxruntime-node: 1.20.1 + onnxruntime-web: 1.21.0-dev.20241205-d27fecd3d3 + sharp: ^0.33.5 + checksum: fdff5cec1336fdb4ad923592d77348730f58263928a8c90d0f79aed7863e74a5521b9e99903c906a6e1c056fe0f81f811e4d403b62d3edb66da9389cff025acf + languageName: node + linkType: hard + "@humanwhocodes/config-array@npm:^0.11.11": version: 0.11.11 resolution: "@humanwhocodes/config-array@npm:0.11.11" @@ -10849,6 +10861,15 @@ __metadata: languageName: node linkType: hard +"@isaacs/fs-minipass@npm:^4.0.0": + version: 4.0.1 + resolution: "@isaacs/fs-minipass@npm:4.0.1" + dependencies: + minipass: ^7.0.4 + checksum: 5d36d289960e886484362d9eb6a51d1ea28baed5f5d0140bbe62b99bac52eaf06cc01c2bc0d3575977962f84f6b2c4387b043ee632216643d4787b0999465bf2 + languageName: node + linkType: hard + "@istanbuljs/load-nyc-config@npm:^1.0.0": version: 1.1.0 resolution: "@istanbuljs/load-nyc-config@npm:1.1.0" @@ -11817,6 +11838,7 @@ __metadata: "@google-cloud/storage": ^7.7.0 "@gradientai/nodejs-sdk": ^1.2.0 "@huggingface/inference": ^2.6.4 + "@huggingface/transformers": ^3.2.3 "@ibm-cloud/watsonx-ai": ^1.3.0 "@jest/globals": ^29.5.0 "@lancedb/lancedb": ^0.13.0 @@ -11864,6 +11886,7 @@ __metadata: "@types/pg": ^8.11.0 "@types/pg-copy-streams": ^1.2.2 "@types/uuid": ^9 + "@types/word-extractor": ^1 "@types/ws": ^8 "@typescript-eslint/eslint-plugin": ^5.58.0 "@typescript-eslint/parser": ^5.58.0 @@ -11874,7 +11897,6 @@ __metadata: "@vercel/postgres": ^0.5.0 "@writerai/writer-sdk": ^0.40.2 "@xata.io/client": ^0.28.0 - "@xenova/transformers": ^2.17.2 "@zilliz/milvus2-sdk-node": ">=2.3.5" apify-client: ^2.7.1 assemblyai: ^4.6.0 @@ -11964,6 +11986,7 @@ __metadata: voy-search: 0.6.2 weaviate-ts-client: ^1.4.0 web-auth-library: ^1.0.3 + word-extractor: ^1.0.4 youtube-transcript: ^1.0.6 youtubei.js: ^9.1.0 zod: ^3.22.3 @@ -11997,6 +12020,7 @@ __metadata: "@google-cloud/storage": ^6.10.1 || ^7.7.0 "@gradientai/nodejs-sdk": ^1.2.0 "@huggingface/inference": ^2.6.4 + "@huggingface/transformers": ^3.2.3 "@ibm-cloud/watsonx-ai": "*" "@lancedb/lancedb": ^0.12.0 "@langchain/core": ">=0.2.21 <0.4.0" @@ -12030,7 +12054,6 @@ __metadata: "@vercel/postgres": ^0.5.0 "@writerai/writer-sdk": ^0.40.2 "@xata.io/client": ^0.28.0 - "@xenova/transformers": ^2.17.2 "@zilliz/milvus2-sdk-node": ">=2.3.5" apify-client: ^2.7.1 assemblyai: ^4.6.0 @@ -12092,6 +12115,7 @@ __metadata: voy-search: 0.6.2 weaviate-ts-client: "*" web-auth-library: ^1.0.3 + word-extractor: "*" ws: ^8.14.2 youtube-transcript: ^1.0.6 youtubei.js: ^9.1.0 @@ -12150,6 +12174,8 @@ __metadata: optional: true "@huggingface/inference": optional: true + "@huggingface/transformers": + optional: true "@lancedb/lancedb": optional: true "@layerup/layerup-security": @@ -12212,8 +12238,6 @@ __metadata: optional: true "@xata.io/client": optional: true - "@xenova/transformers": - optional: true "@zilliz/milvus2-sdk-node": optional: true apify-client: @@ -12332,6 +12356,8 @@ __metadata: optional: true web-auth-library: optional: true + word-extractor: + optional: true ws: optional: true youtube-transcript: @@ -12417,7 +12443,7 @@ __metadata: languageName: unknown linkType: soft -"@langchain/google-common@^0.1.0, @langchain/google-common@workspace:*, @langchain/google-common@workspace:libs/langchain-google-common, @langchain/google-common@~0.1.5": +"@langchain/google-common@^0.1.0, @langchain/google-common@workspace:*, @langchain/google-common@workspace:libs/langchain-google-common, @langchain/google-common@~0.1.6": version: 0.0.0-use.local resolution: "@langchain/google-common@workspace:libs/langchain-google-common" dependencies: @@ -12452,13 +12478,13 @@ __metadata: languageName: unknown linkType: soft -"@langchain/google-gauth@workspace:libs/langchain-google-gauth, @langchain/google-gauth@~0.1.5": +"@langchain/google-gauth@workspace:libs/langchain-google-gauth, @langchain/google-gauth@~0.1.6": version: 0.0.0-use.local resolution: "@langchain/google-gauth@workspace:libs/langchain-google-gauth" dependencies: "@jest/globals": ^29.5.0 "@langchain/core": "workspace:*" - "@langchain/google-common": ~0.1.5 + "@langchain/google-common": ~0.1.6 "@langchain/scripts": ">=0.1.0 <0.2.0" "@swc/core": ^1.3.90 "@swc/jest": ^0.2.29 @@ -12531,7 +12557,7 @@ __metadata: "@jest/globals": ^29.5.0 "@langchain/core": "workspace:*" "@langchain/google-common": ^0.1.0 - "@langchain/google-webauth": ~0.1.5 + "@langchain/google-webauth": ~0.1.6 "@langchain/scripts": ">=0.1.0 <0.2.0" "@langchain/standard-tests": 0.0.0 "@swc/core": ^1.3.90 @@ -12567,7 +12593,7 @@ __metadata: "@jest/globals": ^29.5.0 "@langchain/core": "workspace:*" "@langchain/google-common": ^0.1.0 - "@langchain/google-gauth": ~0.1.5 + "@langchain/google-gauth": ~0.1.6 "@langchain/scripts": ">=0.1.0 <0.2.0" "@langchain/standard-tests": 0.0.0 "@swc/core": ^1.3.90 @@ -12596,13 +12622,13 @@ __metadata: languageName: unknown linkType: soft -"@langchain/google-webauth@workspace:libs/langchain-google-webauth, @langchain/google-webauth@~0.1.5": +"@langchain/google-webauth@workspace:libs/langchain-google-webauth, @langchain/google-webauth@~0.1.6": version: 0.0.0-use.local resolution: "@langchain/google-webauth@workspace:libs/langchain-google-webauth" dependencies: "@jest/globals": ^29.5.0 "@langchain/core": "workspace:*" - "@langchain/google-common": ~0.1.5 + "@langchain/google-common": ~0.1.6 "@langchain/scripts": ">=0.1.0 <0.2.0" "@swc/core": ^1.3.90 "@swc/jest": ^0.2.29 @@ -20147,6 +20173,15 @@ __metadata: languageName: node linkType: hard +"@types/word-extractor@npm:^1": + version: 1.0.6 + resolution: "@types/word-extractor@npm:1.0.6" + dependencies: + "@types/node": "*" + checksum: 3619602252493e1ad2671af6ce73a342cdc8452d5a3473123474ab6f3f5deb466b9c4cfdcbac75eacb430f545ae5708e3165b01e03b42053ca9fa87d2920fe3d + languageName: node + linkType: hard + "@types/ws@npm:8.5.9": version: 8.5.9 resolution: "@types/ws@npm:8.5.9" @@ -20994,21 +21029,6 @@ __metadata: languageName: node linkType: hard -"@xenova/transformers@npm:^2.17.2": - version: 2.17.2 - resolution: "@xenova/transformers@npm:2.17.2" - dependencies: - "@huggingface/jinja": ^0.2.2 - onnxruntime-node: 1.14.0 - onnxruntime-web: 1.14.0 - sharp: ^0.32.0 - dependenciesMeta: - onnxruntime-node: - optional: true - checksum: 5d49219995f401eedab6e0dcde6ad15ce5df0466388448703ca191e083bb0dc95692c1b539827d47399410d089cb078c47c862b0c550e34b54670fc435e83941 - languageName: node - linkType: hard - "@xmldom/xmldom@npm:^0.8.10, @xmldom/xmldom@npm:^0.8.6": version: 0.8.10 resolution: "@xmldom/xmldom@npm:0.8.10" @@ -23189,6 +23209,13 @@ __metadata: languageName: node linkType: hard +"chownr@npm:^3.0.0": + version: 3.0.0 + resolution: "chownr@npm:3.0.0" + checksum: fd73a4bab48b79e66903fe1cafbdc208956f41ea4f856df883d0c7277b7ab29fd33ee65f93b2ec9192fc0169238f2f8307b7735d27c155821d886b84aa97aa8d + languageName: node + linkType: hard + "chromadb@npm:^1.5.3": version: 1.5.3 resolution: "chromadb@npm:1.5.3" @@ -25600,7 +25627,7 @@ __metadata: languageName: node linkType: hard -"detect-libc@npm:2.0.2, detect-libc@npm:^2.0.2": +"detect-libc@npm:2.0.2": version: 2.0.2 resolution: "detect-libc@npm:2.0.2" checksum: 2b2cd3649b83d576f4be7cc37eb3b1815c79969c8b1a03a40a4d55d83bc74d010753485753448eacb98784abf22f7dbd3911fd3b60e29fda28fed2d1a997944d @@ -29440,6 +29467,22 @@ __metadata: languageName: node linkType: hard +"glob@npm:^10.3.7": + version: 10.4.5 + resolution: "glob@npm:10.4.5" + dependencies: + foreground-child: ^3.1.0 + jackspeak: ^3.1.2 + minimatch: ^9.0.4 + minipass: ^7.1.2 + package-json-from-dist: ^1.0.0 + path-scurry: ^1.11.1 + bin: + glob: dist/esm/bin.mjs + checksum: 0bc725de5e4862f9f387fd0f2b274baf16850dcd2714502ccf471ee401803997983e2c05590cb65f9675a3c6f2a58e7a53f9e365704108c6ad3cbf1d60934c4a + languageName: node + linkType: hard + "glob@npm:^7.0.0, glob@npm:^7.1.3, glob@npm:^7.1.4, glob@npm:^7.1.6": version: 7.2.3 resolution: "glob@npm:7.2.3" @@ -32047,6 +32090,19 @@ __metadata: languageName: node linkType: hard +"jackspeak@npm:^3.1.2": + version: 3.4.3 + resolution: "jackspeak@npm:3.4.3" + dependencies: + "@isaacs/cliui": ^8.0.2 + "@pkgjs/parseargs": ^0.11.0 + dependenciesMeta: + "@pkgjs/parseargs": + optional: true + checksum: be31027fc72e7cc726206b9f560395604b82e0fddb46c4cbf9f97d049bcef607491a5afc0699612eaa4213ca5be8fd3e1e7cd187b3040988b65c9489838a7c00 + languageName: node + linkType: hard + "javascript-stringify@npm:^2.0.1": version: 2.1.0 resolution: "javascript-stringify@npm:2.1.0" @@ -34025,7 +34081,7 @@ __metadata: languageName: node linkType: hard -"long@npm:*, long@npm:^5.2.1, long@npm:~5.2.3": +"long@npm:*, long@npm:^5.2.1, long@npm:^5.2.3, long@npm:~5.2.3": version: 5.2.3 resolution: "long@npm:5.2.3" checksum: 885ede7c3de4facccbd2cacc6168bae3a02c3e836159ea4252c87b6e34d40af819824b2d4edce330bfb5c4d6e8ce3ec5864bdcf9473fa1f53a4f8225860e5897 @@ -34845,6 +34901,13 @@ __metadata: languageName: node linkType: hard +"minipass@npm:^7.1.2": + version: 7.1.2 + resolution: "minipass@npm:7.1.2" + checksum: 2bfd325b95c555f2b4d2814d49325691c7bee937d753814861b0b49d5edcda55cbbf22b6b6a60bb91eddac8668771f03c5ff647dcd9d0f798e9548b9cdc46ee3 + languageName: node + linkType: hard + "minizlib@npm:^2.0.0, minizlib@npm:^2.1.1, minizlib@npm:^2.1.2": version: 2.1.2 resolution: "minizlib@npm:2.1.2" @@ -34855,6 +34918,16 @@ __metadata: languageName: node linkType: hard +"minizlib@npm:^3.0.1": + version: 3.0.1 + resolution: "minizlib@npm:3.0.1" + dependencies: + minipass: ^7.0.4 + rimraf: ^5.0.5 + checksum: da0a53899252380475240c587e52c824f8998d9720982ba5c4693c68e89230718884a209858c156c6e08d51aad35700a3589987e540593c36f6713fe30cd7338 + languageName: node + linkType: hard + "mitt@npm:3.0.1": version: 3.0.1 resolution: "mitt@npm:3.0.1" @@ -35376,15 +35449,6 @@ __metadata: languageName: node linkType: hard -"node-addon-api@npm:^6.1.0": - version: 6.1.0 - resolution: "node-addon-api@npm:6.1.0" - dependencies: - node-gyp: latest - checksum: 3a539510e677cfa3a833aca5397300e36141aca064cdc487554f2017110709a03a95da937e98c2a14ec3c626af7b2d1b6dabe629a481f9883143d0d5bff07bf2 - languageName: node - linkType: hard - "node-addon-api@npm:^7.0.0": version: 7.0.0 resolution: "node-addon-api@npm:7.0.0" @@ -36099,42 +36163,42 @@ __metadata: languageName: node linkType: hard -"onnx-proto@npm:^4.0.4": - version: 4.0.4 - resolution: "onnx-proto@npm:4.0.4" - dependencies: - protobufjs: ^6.8.8 - checksum: 4122ea200bb4a7c93a464c5a49351025537f5b2c9a5848a9b090700437e6c458a44491096502324a3d7e6fb388be4967a824d12ac18d7be6721d0d5779400fd5 +"onnxruntime-common@npm:1.20.1": + version: 1.20.1 + resolution: "onnxruntime-common@npm:1.20.1" + checksum: 5cde8fae546c9a4a2d8f13e18cc4c346d77e733d08d1f6b95f4958fb09618592113d232db64049fafadbd18913ec8564e6c06c47dadc4c2aac8df4ed18b2956c languageName: node linkType: hard -"onnxruntime-common@npm:~1.14.0": - version: 1.14.0 - resolution: "onnxruntime-common@npm:1.14.0" - checksum: 6f0dda57440e94ad8c3df80c9812b38651daa4482af4159bada6cf19f8e09a5258994e57038acdfd54ecab7b9779e0e8ce37b3315ee6c48dd6c1c943fd15fa13 +"onnxruntime-common@npm:1.21.0-dev.20241205-6ed77cc374": + version: 1.21.0-dev.20241205-6ed77cc374 + resolution: "onnxruntime-common@npm:1.21.0-dev.20241205-6ed77cc374" + checksum: f490d6b1a8c059ce5665a468ac1c38de4c3729ead0bae173a0c9334c32a67fb2899972b6e185cc6c42f05e61f2c3da2738a814dbc89b5577206a7b17e29f4190 languageName: node linkType: hard -"onnxruntime-node@npm:1.14.0": - version: 1.14.0 - resolution: "onnxruntime-node@npm:1.14.0" +"onnxruntime-node@npm:1.20.1": + version: 1.20.1 + resolution: "onnxruntime-node@npm:1.20.1" dependencies: - onnxruntime-common: ~1.14.0 + onnxruntime-common: 1.20.1 + tar: ^7.0.1 + checksum: 6b5467eb1d08e1f5931ed1bff77e180f8600be917b690bad5edcfad61fcb797d29f74c5cff5eeb1f8bc95a36d261647d68ca88e149b0aa88412d8dea90901042 conditions: (os=win32 | os=darwin | os=linux) languageName: node linkType: hard -"onnxruntime-web@npm:1.14.0": - version: 1.14.0 - resolution: "onnxruntime-web@npm:1.14.0" +"onnxruntime-web@npm:1.21.0-dev.20241205-d27fecd3d3": + version: 1.21.0-dev.20241205-d27fecd3d3 + resolution: "onnxruntime-web@npm:1.21.0-dev.20241205-d27fecd3d3" dependencies: flatbuffers: ^1.12.0 guid-typescript: ^1.0.9 - long: ^4.0.0 - onnx-proto: ^4.0.4 - onnxruntime-common: ~1.14.0 + long: ^5.2.3 + onnxruntime-common: 1.21.0-dev.20241205-6ed77cc374 platform: ^1.3.6 - checksum: 6faa8886683c301e267dad336a8f819a33253f3b3e93c0fe7af7df2aa45e61f6737b43119d68a448d17d08cbcd83e17607f9242e2222d5b4f9552351ddaa3289 + protobufjs: ^7.2.4 + checksum: f668b638440dc8122209ce04c9e06b449bd2d7d0ce05be0d0618468d98746310e4a4d1a15afea30c86e98cea0053496d1c0fef5e6785153f16be8530f24018b8 languageName: node linkType: hard @@ -36603,6 +36667,13 @@ __metadata: languageName: node linkType: hard +"package-json-from-dist@npm:^1.0.0": + version: 1.0.1 + resolution: "package-json-from-dist@npm:1.0.1" + checksum: 58ee9538f2f762988433da00e26acc788036914d57c71c246bf0be1b60cdbd77dd60b6a3e1a30465f0b248aeb80079e0b34cb6050b1dfa18c06953bb1cbc7602 + languageName: node + linkType: hard + "package-json@npm:^10.0.0": version: 10.0.1 resolution: "package-json@npm:10.0.1" @@ -36898,6 +36969,16 @@ __metadata: languageName: node linkType: hard +"path-scurry@npm:^1.11.1": + version: 1.11.1 + resolution: "path-scurry@npm:1.11.1" + dependencies: + lru-cache: ^10.2.0 + minipass: ^5.0.0 || ^6.0.2 || ^7.0.0 + checksum: 890d5abcd593a7912dcce7cf7c6bf7a0b5648e3dee6caf0712c126ca0a65c7f3d7b9d769072a4d1baf370f61ce493ab5b038d59988688e0c5f3f646ee3c69023 + languageName: node + linkType: hard + "path-scurry@npm:^1.7.0": version: 1.9.2 resolution: "path-scurry@npm:1.9.2" @@ -39617,6 +39698,17 @@ __metadata: languageName: node linkType: hard +"rimraf@npm:^5.0.5": + version: 5.0.10 + resolution: "rimraf@npm:5.0.10" + dependencies: + glob: ^10.3.7 + bin: + rimraf: dist/esm/bin.mjs + checksum: 50e27388dd2b3fa6677385fc1e2966e9157c89c86853b96d02e6915663a96b7ff4d590e14f6f70e90f9b554093aa5dbc05ac3012876be558c06a65437337bc05 + languageName: node + linkType: hard + "robust-predicates@npm:^3.0.2": version: 3.0.2 resolution: "robust-predicates@npm:3.0.2" @@ -39887,6 +39979,15 @@ __metadata: languageName: node linkType: hard +"saxes@npm:^5.0.1": + version: 5.0.1 + resolution: "saxes@npm:5.0.1" + dependencies: + xmlchars: ^2.2.0 + checksum: 5636b55cf15f7cf0baa73f2797bf992bdcf75d1b39d82c0aa4608555c774368f6ac321cb641fd5f3d3ceb87805122cd47540da6a7b5960fe0dbdb8f8c263f000 + languageName: node + linkType: hard + "saxes@npm:^6.0.0": version: 6.0.0 resolution: "saxes@npm:6.0.0" @@ -40285,23 +40386,6 @@ __metadata: languageName: node linkType: hard -"sharp@npm:^0.32.0": - version: 0.32.4 - resolution: "sharp@npm:0.32.4" - dependencies: - color: ^4.2.3 - detect-libc: ^2.0.2 - node-addon-api: ^6.1.0 - node-gyp: latest - prebuild-install: ^7.1.1 - semver: ^7.5.4 - simple-get: ^4.0.1 - tar-fs: ^3.0.4 - tunnel-agent: ^0.6.0 - checksum: 52e3cfe8fbba2623a9b935be8a3d00d6993a2c56c775ac5cc89b273826db95f029f68a0029a37f96dcb6790aa2e3c05a02599035535b319f50ab31f5d86a13f0 - languageName: node - linkType: hard - "sharp@npm:^0.33.5": version: 0.33.5 resolution: "sharp@npm:0.33.5" @@ -40479,7 +40563,7 @@ __metadata: languageName: node linkType: hard -"simple-get@npm:^4.0.0, simple-get@npm:^4.0.1": +"simple-get@npm:^4.0.0": version: 4.0.1 resolution: "simple-get@npm:4.0.1" dependencies: @@ -41555,17 +41639,6 @@ __metadata: languageName: node linkType: hard -"tar-fs@npm:^3.0.4": - version: 3.0.4 - resolution: "tar-fs@npm:3.0.4" - dependencies: - mkdirp-classic: ^0.5.2 - pump: ^3.0.0 - tar-stream: ^3.1.5 - checksum: dcf4054f9e92ca0efe61c2b3f612914fb259a47900aa908a63106513a6d006c899b426ada53eb88d9dbbf089b5724c8e90b96a2c4ca6171845fa14203d734e30 - languageName: node - linkType: hard - "tar-fs@npm:^3.0.6": version: 3.0.6 resolution: "tar-fs@npm:3.0.6" @@ -41664,6 +41737,20 @@ __metadata: languageName: node linkType: hard +"tar@npm:^7.0.1": + version: 7.4.3 + resolution: "tar@npm:7.4.3" + dependencies: + "@isaacs/fs-minipass": ^4.0.0 + chownr: ^3.0.0 + minipass: ^7.1.2 + minizlib: ^3.0.1 + mkdirp: ^3.0.1 + yallist: ^5.0.0 + checksum: 8485350c0688331c94493031f417df069b778aadb25598abdad51862e007c39d1dd5310702c7be4a6784731a174799d8885d2fde0484269aea205b724d7b2ffa + languageName: node + linkType: hard + "teeny-request@npm:^9.0.0": version: 9.0.0 resolution: "teeny-request@npm:9.0.0" @@ -44281,6 +44368,16 @@ __metadata: languageName: node linkType: hard +"word-extractor@npm:^1.0.4": + version: 1.0.4 + resolution: "word-extractor@npm:1.0.4" + dependencies: + saxes: ^5.0.1 + yauzl: ^2.10.0 + checksum: 04ed0ef1dfd6b26ab2094671e72f16e5a948f9978da3fd6b9d01ff475ecd048199f529d989f1d0dfe3da684a1aa8bb86e4388edabd706fd74a0be4eb030183cd + languageName: node + linkType: hard + "word-wrap@npm:^1.2.3, word-wrap@npm:~1.2.3": version: 1.2.3 resolution: "word-wrap@npm:1.2.3" @@ -44537,6 +44634,13 @@ __metadata: languageName: node linkType: hard +"yallist@npm:^5.0.0": + version: 5.0.0 + resolution: "yallist@npm:5.0.0" + checksum: eba51182400b9f35b017daa7f419f434424410691bbc5de4f4240cc830fdef906b504424992700dc047f16b4d99100a6f8b8b11175c193f38008e9c96322b6a5 + languageName: node + linkType: hard + "yaml-loader@npm:^0.8.0": version: 0.8.0 resolution: "yaml-loader@npm:0.8.0"