Skip to content

Commit

Permalink
fix(community): Migrate xenova transformers lib to huggingface (#7431)
Browse files Browse the repository at this point in the history
Co-authored-by: jacoblee93 <[email protected]>
  • Loading branch information
PylotLight and jacoblee93 authored Dec 31, 2024
1 parent ed63546 commit 7e7df50
Show file tree
Hide file tree
Showing 22 changed files with 344 additions and 132 deletions.
1 change: 1 addition & 0 deletions docs/api_refs/blacklisted-entrypoints.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"../../langchain/src/embeddings/tensorflow.ts",
"../../langchain/src/embeddings/hf.ts",
"../../langchain/src/embeddings/hf_transformers.ts",
"../../langchain/src/embeddings/huggingface_transformers.ts",
"../../langchain/src/embeddings/googlevertexai.ts",
"../../langchain/src/embeddings/googlepalm.ts",
"../../langchain/src/embeddings/minimax.ts",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ It runs locally and even works directly in the browser, allowing you to create w

## Setup

You'll need to install the [@xenova/transformers](https://www.npmjs.com/package/@xenova/transformers) package as a peer dependency:
You'll need to install the [@huggingface/transformers](https://www.npmjs.com/package/@huggingface/transformers) package as a peer dependency:

```bash npm2yarn
npm install @xenova/transformers
npm install @huggingface/transformers
```

import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx";
Expand Down
2 changes: 1 addition & 1 deletion environment_tests/test-exports-cjs/src/import.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ async function test() {
const { OpenAI } = await import("@langchain/openai");
const { LLMChain } = await import("langchain/chains");
const { ChatPromptTemplate } = await import("@langchain/core/prompts");
const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/hf_transformers");
const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/huggingface_transformers");
const { Document } = await import("@langchain/core/documents");
const { MemoryVectorStore } = await import("langchain/vectorstores/memory");

Expand Down
2 changes: 1 addition & 1 deletion environment_tests/test-exports-cjs/src/index.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai";
import { LLMChain } from "langchain/chains";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";
import { Document } from "@langchain/core/documents";

// Test exports
Expand Down
6 changes: 4 additions & 2 deletions environment_tests/test-exports-cjs/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai";
import { LLMChain } from "langchain/chains";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";
import { Document } from "@langchain/core/documents";

async function test(useAzure: boolean = false) {
Expand All @@ -25,7 +25,9 @@ async function test(useAzure: boolean = false) {
openAIApiKey: "sk-XXXX",
};

const vs = new MemoryVectorStore(new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2" }));
const vs = new MemoryVectorStore(
new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2" })
);

await vs.addVectors(
[
Expand Down
2 changes: 1 addition & 1 deletion environment_tests/test-exports-cjs/src/require.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const { OpenAI } = require("@langchain/openai");
const { LLMChain } = require("langchain/chains");
const { ChatPromptTemplate } = require("@langchain/core/prompts");
const { MemoryVectorStore } = require("langchain/vectorstores/memory");
const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/hf_transformers");
const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/huggingface_transformers");
const { Document } = require("@langchain/core/documents");

async function test() {
Expand Down
2 changes: 1 addition & 1 deletion environment_tests/test-exports-esm/src/import.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ async function test() {
const { LLMChain } = await import("langchain/chains");
const { ChatPromptTemplate } = await import("@langchain/core/prompts");
const { MemoryVectorStore } = await import("langchain/vectorstores/memory");
const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/hf_transformers");
const { HuggingFaceTransformersEmbeddings } = await import("@langchain/community/embeddings/huggingface_transformers");
const { Document } = await import("@langchain/core/documents");

// Test exports
Expand Down
2 changes: 1 addition & 1 deletion environment_tests/test-exports-esm/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai";
import { LLMChain } from "langchain/chains";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";
import { Document } from "@langchain/core/documents";
import { CallbackManager } from "@langchain/core/callbacks/manager";

Expand Down
6 changes: 4 additions & 2 deletions environment_tests/test-exports-esm/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { OpenAI } from "@langchain/openai";
import { LLMChain } from "langchain/chains";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";
import { Document } from "@langchain/core/documents";

async function test(useAzure: boolean = false) {
Expand All @@ -24,7 +24,9 @@ async function test(useAzure: boolean = false) {
openAIApiKey: "sk-XXXX",
};

const vs = new MemoryVectorStore(new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2", }));
const vs = new MemoryVectorStore(
new HuggingFaceTransformersEmbeddings({ model: "Xenova/all-MiniLM-L6-v2" })
);

await vs.addVectors(
[
Expand Down
2 changes: 1 addition & 1 deletion environment_tests/test-exports-esm/src/require.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ const { OpenAI } = require("@langchain/openai");
const { LLMChain } = require("langchain/chains");
const { ChatPromptTemplate } = require("@langchain/core/prompts");
const { MemoryVectorStore } = require("langchain/vectorstores/memory");
const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/hf_transformers");
const { HuggingFaceTransformersEmbeddings } = require("@langchain/community/embeddings/huggingface_transformers");
const { Document } = require("@langchain/core/documents");

async function test() {
Expand Down
2 changes: 1 addition & 1 deletion examples/src/models/embeddings/hf_transformers.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";

const model = new HuggingFaceTransformersEmbeddings({
model: "Xenova/all-MiniLM-L6-v2",
Expand Down
2 changes: 1 addition & 1 deletion examples/src/use_cases/local_retrieval_qa/chain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
import { HNSWLib } from "@langchain/community/vectorstores/hnswlib";
import { Ollama } from "@langchain/community/llms/ollama";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";
import { formatDocumentsAsString } from "langchain/util/document";
import { PromptTemplate } from "@langchain/core/prompts";
import {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/cheerio";
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
import { HNSWLib } from "@langchain/community/vectorstores/hnswlib";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";

const loader = new CheerioWebBaseLoader(
"https://lilianweng.github.io/posts/2023-06-23-agent/"
Expand Down
2 changes: 1 addition & 1 deletion examples/src/use_cases/local_retrieval_qa/qa_chain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/
import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
import { HNSWLib } from "@langchain/community/vectorstores/hnswlib";
import { Ollama } from "@langchain/community/llms/ollama";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/hf_transformers";
import { HuggingFaceTransformersEmbeddings } from "@langchain/community/embeddings/huggingface_transformers";
import { PromptTemplate } from "@langchain/core/prompts";

const loader = new CheerioWebBaseLoader(
Expand Down
4 changes: 4 additions & 0 deletions libs/langchain-community/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ embeddings/hf_transformers.cjs
embeddings/hf_transformers.js
embeddings/hf_transformers.d.ts
embeddings/hf_transformers.d.cts
embeddings/huggingface_transformers.cjs
embeddings/huggingface_transformers.js
embeddings/huggingface_transformers.d.ts
embeddings/huggingface_transformers.d.cts
embeddings/ibm.cjs
embeddings/ibm.js
embeddings/ibm.d.ts
Expand Down
2 changes: 2 additions & 0 deletions libs/langchain-community/langchain.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ export const config = {
"embeddings/gradient_ai": "embeddings/gradient_ai",
"embeddings/hf": "embeddings/hf",
"embeddings/hf_transformers": "embeddings/hf_transformers",
"embeddings/huggingface_transformers": "embeddings/huggingface_transformers",
"embeddings/ibm": "embeddings/ibm",
"embeddings/jina": "embeddings/jina",
"embeddings/llama_cpp": "embeddings/llama_cpp",
Expand Down Expand Up @@ -355,6 +356,7 @@ export const config = {
"embeddings/tensorflow",
"embeddings/hf",
"embeddings/hf_transformers",
"embeddings/huggingface_transformers",
"embeddings/ibm",
"embeddings/jina",
"embeddings/llama_cpp",
Expand Down
23 changes: 18 additions & 5 deletions libs/langchain-community/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
"@google-cloud/storage": "^7.7.0",
"@gradientai/nodejs-sdk": "^1.2.0",
"@huggingface/inference": "^2.6.4",
"@huggingface/transformers": "^3.2.3",
"@ibm-cloud/watsonx-ai": "^1.3.0",
"@jest/globals": "^29.5.0",
"@lancedb/lancedb": "^0.13.0",
Expand Down Expand Up @@ -134,7 +135,6 @@
"@vercel/postgres": "^0.5.0",
"@writerai/writer-sdk": "^0.40.2",
"@xata.io/client": "^0.28.0",
"@xenova/transformers": "^2.17.2",
"@zilliz/milvus2-sdk-node": ">=2.3.5",
"apify-client": "^2.7.1",
"assemblyai": "^4.6.0",
Expand Down Expand Up @@ -249,6 +249,7 @@
"@google-cloud/storage": "^6.10.1 || ^7.7.0",
"@gradientai/nodejs-sdk": "^1.2.0",
"@huggingface/inference": "^2.6.4",
"@huggingface/transformers": "^3.2.3",
"@ibm-cloud/watsonx-ai": "*",
"@lancedb/lancedb": "^0.12.0",
"@langchain/core": ">=0.2.21 <0.4.0",
Expand Down Expand Up @@ -282,7 +283,6 @@
"@vercel/postgres": "^0.5.0",
"@writerai/writer-sdk": "^0.40.2",
"@xata.io/client": "^0.28.0",
"@xenova/transformers": "^2.17.2",
"@zilliz/milvus2-sdk-node": ">=2.3.5",
"apify-client": "^2.7.1",
"assemblyai": "^4.6.0",
Expand Down Expand Up @@ -430,6 +430,9 @@
"@huggingface/inference": {
"optional": true
},
"@huggingface/transformers": {
"optional": true
},
"@lancedb/lancedb": {
"optional": true
},
Expand Down Expand Up @@ -523,9 +526,6 @@
"@xata.io/client": {
"optional": true
},
"@xenova/transformers": {
"optional": true
},
"@zilliz/milvus2-sdk-node": {
"optional": true
},
Expand Down Expand Up @@ -1113,6 +1113,15 @@
"import": "./embeddings/hf_transformers.js",
"require": "./embeddings/hf_transformers.cjs"
},
"./embeddings/huggingface_transformers": {
"types": {
"import": "./embeddings/huggingface_transformers.d.ts",
"require": "./embeddings/huggingface_transformers.d.cts",
"default": "./embeddings/huggingface_transformers.d.ts"
},
"import": "./embeddings/huggingface_transformers.js",
"require": "./embeddings/huggingface_transformers.cjs"
},
"./embeddings/ibm": {
"types": {
"import": "./embeddings/ibm.d.ts",
Expand Down Expand Up @@ -3336,6 +3345,10 @@
"embeddings/hf_transformers.js",
"embeddings/hf_transformers.d.ts",
"embeddings/hf_transformers.d.cts",
"embeddings/huggingface_transformers.cjs",
"embeddings/huggingface_transformers.js",
"embeddings/huggingface_transformers.d.ts",
"embeddings/huggingface_transformers.d.cts",
"embeddings/ibm.cjs",
"embeddings/ibm.js",
"embeddings/ibm.d.ts",
Expand Down
34 changes: 11 additions & 23 deletions libs/langchain-community/src/embeddings/hf_transformers.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/* eslint-disable */
// @ts-nocheck
import type {
PretrainedOptions,
FeatureExtractionPipelineOptions,
Expand All @@ -6,13 +8,19 @@ import type {
import { Embeddings, type EmbeddingsParams } from "@langchain/core/embeddings";
import { chunkArray } from "@langchain/core/utils/chunk_array";

/**
* @deprecated Import from
* "@langchain/community/embeddings/huggingface_transformers"
* instead and use the new "@huggingface/transformers" peer dependency.
*/
export interface HuggingFaceTransformersEmbeddingsParams
extends EmbeddingsParams {
/**
* Model name to use
* Alias for `model`
*/
modelName: string;

/** Model name to use */
model: string;

Expand Down Expand Up @@ -42,24 +50,10 @@ export interface HuggingFaceTransformersEmbeddingsParams
*/
pipelineOptions?: FeatureExtractionPipelineOptions;
}

/**
* @example
* ```typescript
* const model = new HuggingFaceTransformersEmbeddings({
* model: "Xenova/all-MiniLM-L6-v2",
* });
*
* // Embed a single query
* const res = await model.embedQuery(
* "What would be a good company name for a company that makes colorful socks?"
* );
* console.log({ res });
*
* // Embed multiple documents
* const documentRes = await model.embedDocuments(["Hello world", "Bye bye"]);
* console.log({ documentRes });
* ```
* @deprecated Import from
* "@langchain/community/embeddings/huggingface_transformers"
* instead and use the new "@huggingface/transformers" peer dependency.
*/
export class HuggingFaceTransformersEmbeddings
extends Embeddings
Expand All @@ -83,7 +77,6 @@ export class HuggingFaceTransformersEmbeddings

constructor(fields?: Partial<HuggingFaceTransformersEmbeddingsParams>) {
super(fields ?? {});

this.modelName = fields?.model ?? fields?.modelName ?? this.model;
this.model = this.modelName;
this.stripNewLines = fields?.stripNewLines ?? this.stripNewLines;
Expand All @@ -95,27 +88,22 @@ export class HuggingFaceTransformersEmbeddings
...fields?.pipelineOptions,
};
}

async embedDocuments(texts: string[]): Promise<number[][]> {
const batches = chunkArray(
this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts,
this.batchSize
);

const batchRequests = batches.map((batch) => this.runEmbedding(batch));
const batchResponses = await Promise.all(batchRequests);
const embeddings: number[][] = [];

for (let i = 0; i < batchResponses.length; i += 1) {
const batchResponse = batchResponses[i];
for (let j = 0; j < batchResponse.length; j += 1) {
embeddings.push(batchResponse[j]);
}
}

return embeddings;
}

async embedQuery(text: string): Promise<number[]> {
const data = await this.runEmbedding([
this.stripNewLines ? text.replace(/\n/g, " ") : text,
Expand Down
Loading

0 comments on commit 7e7df50

Please sign in to comment.