diff --git a/.github/workflows/test-extra.yml b/.github/workflows/test-extra.yml index eacd3ab0f568..e99ea516d754 100644 --- a/.github/workflows/test-extra.yml +++ b/.github/workflows/test-extra.yml @@ -35,30 +35,6 @@ jobs: run: | make --jobs=5 --output-sync=target -C backend/python/transformers make --jobs=5 --output-sync=target -C backend/python/transformers test - - tests-sentencetransformers: - runs-on: ubuntu-latest - steps: - - name: Clone - uses: actions/checkout@v4 - with: - submodules: true - - name: Dependencies - run: | - sudo apt-get update - sudo apt-get install build-essential ffmpeg - # Install UV - curl -LsSf https://astral.sh/uv/install.sh | sh - sudo apt-get install -y ca-certificates cmake curl patch python3-pip - sudo apt-get install -y libopencv-dev - pip install --user --no-cache-dir grpcio-tools==1.64.1 - - - name: Test sentencetransformers - run: | - make --jobs=5 --output-sync=target -C backend/python/sentencetransformers - make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test - - tests-rerankers: runs-on: ubuntu-latest steps: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ecef0569d3b0..0ee93afad4d8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -100,8 +100,7 @@ jobs: # The python3-grpc-tools package in 22.04 is too old pip install --user grpcio-tools - sudo rm -rfv /usr/bin/conda || true - PATH=$PATH:/opt/conda/bin make -C backend/python/sentencetransformers + make -C backend/python/transformers # Pre-build piper before we start tests in order to have shared libraries in place make sources/go-piper && \ diff --git a/Dockerfile b/Dockerfile index 9fb07516ea13..4ddc921d897a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,7 +15,7 @@ ARG TARGETARCH ARG TARGETVARIANT ENV DEBIAN_FRONTEND=noninteractive -ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" +ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,transformers:/build/backend/python/transformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,openvoice:/build/backend/python/openvoice/run.sh,kokoro:/build/backend/python/kokoro/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh" RUN apt-get update && \ @@ -456,9 +456,6 @@ RUN if [[ ( "${EXTRA_BACKENDS}" =~ "kokoro" || -z "${EXTRA_BACKENDS}" ) && "$IMA if [[ ( "${EXTRA_BACKENDS}" =~ "openvoice" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/openvoice \ ; fi && \ - if [[ ( "${EXTRA_BACKENDS}" =~ "sentencetransformers" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ - make -C backend/python/sentencetransformers \ - ; fi && \ if [[ ( "${EXTRA_BACKENDS}" =~ "exllama2" || -z "${EXTRA_BACKENDS}" ) && "$IMAGE_TYPE" == "extras" ]]; then \ make -C backend/python/exllama2 \ ; fi && \ diff --git a/Makefile b/Makefile index 03468ffb0a8a..c6f541e7ebdf 100644 --- a/Makefile +++ b/Makefile @@ -497,7 +497,7 @@ test: prepare test-models/testmodel.ggml grpcs @echo 'Running tests' export GO_TAGS="tts stablediffusion debug" $(MAKE) prepare-test - HUGGINGFACE_GRPC=$(abspath ./)/backend/python/sentencetransformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ + HUGGINGFACE_GRPC=$(abspath ./)/backend/python/transformers/run.sh TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="!llama && !llama-gguf" --flake-attempts $(TEST_FLAKES) --fail-fast -v -r $(TEST_PATHS) $(MAKE) test-llama $(MAKE) test-llama-gguf @@ -583,10 +583,10 @@ protogen-go-clean: $(RM) bin/* .PHONY: protogen-python -protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen +protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama2-protogen mamba-protogen rerankers-protogen transformers-protogen parler-tts-protogen kokoro-protogen vllm-protogen openvoice-protogen .PHONY: protogen-python-clean -protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean +protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama2-protogen-clean mamba-protogen-clean rerankers-protogen-clean transformers-protogen-clean parler-tts-protogen-clean kokoro-protogen-clean vllm-protogen-clean openvoice-protogen-clean .PHONY: autogptq-protogen autogptq-protogen: @@ -644,14 +644,6 @@ rerankers-protogen: rerankers-protogen-clean: $(MAKE) -C backend/python/rerankers protogen-clean -.PHONY: sentencetransformers-protogen -sentencetransformers-protogen: - $(MAKE) -C backend/python/sentencetransformers protogen - -.PHONY: sentencetransformers-protogen-clean -sentencetransformers-protogen-clean: - $(MAKE) -C backend/python/sentencetransformers protogen-clean - .PHONY: transformers-protogen transformers-protogen: $(MAKE) -C backend/python/transformers protogen @@ -701,7 +693,6 @@ prepare-extra-conda-environments: protogen-python $(MAKE) -C backend/python/diffusers $(MAKE) -C backend/python/vllm $(MAKE) -C backend/python/mamba - $(MAKE) -C backend/python/sentencetransformers $(MAKE) -C backend/python/rerankers $(MAKE) -C backend/python/transformers $(MAKE) -C backend/python/parler-tts diff --git a/backend/python/sentencetransformers/Makefile b/backend/python/sentencetransformers/Makefile deleted file mode 100644 index 8b18e94338b8..000000000000 --- a/backend/python/sentencetransformers/Makefile +++ /dev/null @@ -1,31 +0,0 @@ -.PHONY: sentencetransformers -sentencetransformers: protogen - bash ./install.sh - - -.PHONY: run -run: protogen - @echo "Running sentencetransformers..." - bash run.sh - @echo "sentencetransformers run." - -# It is not working well by using command line. It only6 works with IDE like VSCode. -.PHONY: test -test: protogen - @echo "Testing sentencetransformers..." - bash test.sh - @echo "sentencetransformers tested." - -.PHONY: protogen -protogen: backend_pb2_grpc.py backend_pb2.py - -.PHONY: protogen-clean -protogen-clean: - $(RM) backend_pb2_grpc.py backend_pb2.py - -backend_pb2_grpc.py backend_pb2.py: - python3 -m grpc_tools.protoc -I../.. --python_out=. --grpc_python_out=. backend.proto - -.PHONY: clean -clean: protogen-clean - rm -rf venv __pycache__ \ No newline at end of file diff --git a/backend/python/sentencetransformers/README.md b/backend/python/sentencetransformers/README.md deleted file mode 100644 index 829cf0d1e843..000000000000 --- a/backend/python/sentencetransformers/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Creating a separate environment for the sentencetransformers project - -``` -make sentencetransformers -``` \ No newline at end of file diff --git a/backend/python/sentencetransformers/backend.py b/backend/python/sentencetransformers/backend.py deleted file mode 100755 index 2a20bf609145..000000000000 --- a/backend/python/sentencetransformers/backend.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python3 -""" -Extra gRPC server for HuggingFace SentenceTransformer models. -""" -from concurrent import futures - -import argparse -import signal -import sys -import os - -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - -from sentence_transformers import SentenceTransformer - -_ONE_DAY_IN_SECONDS = 60 * 60 * 24 - -# If MAX_WORKERS are specified in the environment use it, otherwise default to 1 -MAX_WORKERS = int(os.environ.get('PYTHON_GRPC_MAX_WORKERS', '1')) - -# Implement the BackendServicer class with the service methods -class BackendServicer(backend_pb2_grpc.BackendServicer): - """ - A gRPC servicer for the backend service. - - This class implements the gRPC methods for the backend service, including Health, LoadModel, and Embedding. - """ - def Health(self, request, context): - """ - A gRPC method that returns the health status of the backend service. - - Args: - request: A HealthRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Reply object that contains the health status of the backend service. - """ - return backend_pb2.Reply(message=bytes("OK", 'utf-8')) - - def LoadModel(self, request, context): - """ - A gRPC method that loads a model into memory. - - Args: - request: A LoadModelRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - A Result object that contains the result of the LoadModel operation. - """ - model_name = request.Model - try: - self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode) - except Exception as err: - return backend_pb2.Result(success=False, message=f"Unexpected {err=}, {type(err)=}") - - # Implement your logic here for the LoadModel service - # Replace this with your desired response - return backend_pb2.Result(message="Model loaded successfully", success=True) - - def Embedding(self, request, context): - """ - A gRPC method that calculates embeddings for a given sentence. - - Args: - request: An EmbeddingRequest object that contains the request parameters. - context: A grpc.ServicerContext object that provides information about the RPC. - - Returns: - An EmbeddingResult object that contains the calculated embeddings. - """ - # Implement your logic here for the Embedding service - # Replace this with your desired response - print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr) - sentence_embeddings = self.model.encode(request.Embeddings) - return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings) - - -def serve(address): - server = grpc.server(futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)) - backend_pb2_grpc.add_BackendServicer_to_server(BackendServicer(), server) - server.add_insecure_port(address) - server.start() - print("Server started. Listening on: " + address, file=sys.stderr) - - # Define the signal handler function - def signal_handler(sig, frame): - print("Received termination signal. Shutting down...") - server.stop(0) - sys.exit(0) - - # Set the signal handlers for SIGINT and SIGTERM - signal.signal(signal.SIGINT, signal_handler) - signal.signal(signal.SIGTERM, signal_handler) - - try: - while True: - time.sleep(_ONE_DAY_IN_SECONDS) - except KeyboardInterrupt: - server.stop(0) - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run the gRPC server.") - parser.add_argument( - "--addr", default="localhost:50051", help="The address to bind the server to." - ) - args = parser.parse_args() - - serve(args.addr) diff --git a/backend/python/sentencetransformers/install.sh b/backend/python/sentencetransformers/install.sh deleted file mode 100755 index 36443ef1c559..000000000000 --- a/backend/python/sentencetransformers/install.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -# This is here because the Intel pip index is broken and returns 200 status codes for every package name, it just doesn't return any package links. -# This makes uv think that the package exists in the Intel pip index, and by default it stops looking at other pip indexes once it finds a match. -# We need uv to continue falling through to the pypi default index to find optimum[openvino] in the pypi index -# the --upgrade actually allows us to *downgrade* torch to the version provided in the Intel pip index -if [ "x${BUILD_PROFILE}" == "xintel" ]; then - EXTRA_PIP_INSTALL_FLAGS+=" --upgrade --index-strategy=unsafe-first-match" -fi - -installRequirements diff --git a/backend/python/sentencetransformers/requirements-cpu.txt b/backend/python/sentencetransformers/requirements-cpu.txt deleted file mode 100644 index 1e23f68cf5f6..000000000000 --- a/backend/python/sentencetransformers/requirements-cpu.txt +++ /dev/null @@ -1,6 +0,0 @@ -torch==2.4.1 -accelerate -transformers -bitsandbytes -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-cublas11.txt b/backend/python/sentencetransformers/requirements-cublas11.txt deleted file mode 100644 index 3900aba90f55..000000000000 --- a/backend/python/sentencetransformers/requirements-cublas11.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/cu118 -torch==2.4.1+cu118 -accelerate -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-cublas12.txt b/backend/python/sentencetransformers/requirements-cublas12.txt deleted file mode 100644 index 2afd052036a1..000000000000 --- a/backend/python/sentencetransformers/requirements-cublas12.txt +++ /dev/null @@ -1,4 +0,0 @@ -torch==2.4.1 -accelerate -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-hipblas.txt b/backend/python/sentencetransformers/requirements-hipblas.txt deleted file mode 100644 index b472d3719407..000000000000 --- a/backend/python/sentencetransformers/requirements-hipblas.txt +++ /dev/null @@ -1,5 +0,0 @@ ---extra-index-url https://download.pytorch.org/whl/rocm6.0 -torch==2.4.1+rocm6.0 -accelerate -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements-intel.txt b/backend/python/sentencetransformers/requirements-intel.txt deleted file mode 100644 index e9b72aab5ddd..000000000000 --- a/backend/python/sentencetransformers/requirements-intel.txt +++ /dev/null @@ -1,9 +0,0 @@ ---extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ -intel-extension-for-pytorch==2.3.110+xpu -torch==2.3.1+cxx11.abi -oneccl_bind_pt==2.3.100+xpu -optimum[openvino] -setuptools -accelerate -sentence-transformers==3.3.1 -transformers \ No newline at end of file diff --git a/backend/python/sentencetransformers/requirements.txt b/backend/python/sentencetransformers/requirements.txt deleted file mode 100644 index 6e03c63f458b..000000000000 --- a/backend/python/sentencetransformers/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -grpcio==1.69.0 -protobuf -certifi -datasets -einops \ No newline at end of file diff --git a/backend/python/sentencetransformers/run.sh b/backend/python/sentencetransformers/run.sh deleted file mode 100755 index 375c07e5f426..000000000000 --- a/backend/python/sentencetransformers/run.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -source $(dirname $0)/../common/libbackend.sh - -startBackend $@ \ No newline at end of file diff --git a/backend/python/sentencetransformers/test.py b/backend/python/sentencetransformers/test.py deleted file mode 100644 index 9df52b141dbb..000000000000 --- a/backend/python/sentencetransformers/test.py +++ /dev/null @@ -1,81 +0,0 @@ -""" -A test script to test the gRPC service -""" -import unittest -import subprocess -import time -import backend_pb2 -import backend_pb2_grpc - -import grpc - - -class TestBackendServicer(unittest.TestCase): - """ - TestBackendServicer is the class that tests the gRPC service - """ - def setUp(self): - """ - This method sets up the gRPC service by starting the server - """ - self.service = subprocess.Popen(["python3", "backend.py", "--addr", "localhost:50051"]) - time.sleep(10) - - def tearDown(self) -> None: - """ - This method tears down the gRPC service by terminating the server - """ - self.service.kill() - self.service.wait() - - def test_server_startup(self): - """ - This method tests if the server starts up successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.Health(backend_pb2.HealthMessage()) - self.assertEqual(response.message, b'OK') - except Exception as err: - print(err) - self.fail("Server failed to start") - finally: - self.tearDown() - - def test_load_model(self): - """ - This method tests if the model is loaded successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens")) - self.assertTrue(response.success) - self.assertEqual(response.message, "Model loaded successfully") - except Exception as err: - print(err) - self.fail("LoadModel service failed") - finally: - self.tearDown() - - def test_embedding(self): - """ - This method tests if the embeddings are generated successfully - """ - try: - self.setUp() - with grpc.insecure_channel("localhost:50051") as channel: - stub = backend_pb2_grpc.BackendStub(channel) - response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens")) - self.assertTrue(response.success) - embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.") - embedding_response = stub.Embedding(embedding_request) - self.assertIsNotNone(embedding_response.embeddings) - except Exception as err: - print(err) - self.fail("Embedding service failed") - finally: - self.tearDown() \ No newline at end of file diff --git a/backend/python/sentencetransformers/test.sh b/backend/python/sentencetransformers/test.sh deleted file mode 100755 index 6940b0661df2..000000000000 --- a/backend/python/sentencetransformers/test.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -e - -source $(dirname $0)/../common/libbackend.sh - -runUnittests diff --git a/backend/python/transformers/backend.py b/backend/python/transformers/backend.py index 27257934f582..9b65c6db2ea3 100644 --- a/backend/python/transformers/backend.py +++ b/backend/python/transformers/backend.py @@ -25,6 +25,8 @@ from transformers import AutoProcessor, MusicgenForConditionalGeneration from scipy.io import wavfile import outetts +from sentence_transformers import SentenceTransformer + _ONE_DAY_IN_SECONDS = 60 * 60 * 24 @@ -88,10 +90,12 @@ def LoadModel(self, request, context): self.CUDA = torch.cuda.is_available() self.OV=False self.OuteTTS=False + self.SentenceTransformer = False device_map="cpu" quantization = None + autoTokenizer = True if self.CUDA: from transformers import BitsAndBytesConfig, AutoModelForCausalLM @@ -195,9 +199,11 @@ def LoadModel(self, request, context): device=device_map) self.OV = True elif request.Type == "MusicgenForConditionalGeneration": + autoTokenizer = False self.processor = AutoProcessor.from_pretrained(model_name) self.model = MusicgenForConditionalGeneration.from_pretrained(model_name) elif request.Type == "OuteTTS": + autoTokenizer = False options = request.Options MODELNAME = "OuteAI/OuteTTS-0.3-1B" TOKENIZER = "OuteAI/OuteTTS-0.3-1B" @@ -235,6 +241,10 @@ def LoadModel(self, request, context): self.speaker = self.interface.create_speaker(audio_path=self.AudioPath) else: self.speaker = self.interface.load_default_speaker(name=SPEAKER) + elif request.Type == "SentenceTransformer": + autoTokenizer = False + self.model = SentenceTransformer(model_name, trust_remote_code=request.TrustRemoteCode) + self.SentenceTransformer = True else: print("Automodel", file=sys.stderr) self.model = AutoModel.from_pretrained(model_name, @@ -250,7 +260,7 @@ def LoadModel(self, request, context): else: self.max_tokens = 512 - if request.Type != "MusicgenForConditionalGeneration": + if autoTokenizer: self.tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True) self.XPU = False @@ -286,18 +296,26 @@ def Embedding(self, request, context): max_length = 512 if request.Tokens != 0: max_length = request.Tokens - encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt") - # Create word embeddings - if self.CUDA: - encoded_input = encoded_input.to("cuda") + embeds = None + + if self.SentenceTransformer: + print("Calculated embeddings for: " + request.Embeddings, file=sys.stderr) + embeds = self.model.encode(request.Embeddings) + else: + encoded_input = self.tokenizer(request.Embeddings, padding=True, truncation=True, max_length=max_length, return_tensors="pt") + + # Create word embeddings + if self.CUDA: + encoded_input = encoded_input.to("cuda") - with torch.no_grad(): - model_output = self.model(**encoded_input) + with torch.no_grad(): + model_output = self.model(**encoded_input) - # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence - sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) - return backend_pb2.EmbeddingResult(embeddings=sentence_embeddings[0]) + # Pool to get sentence embeddings; i.e. generate one 1024 vector for the entire sentence + sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']) + embeds = sentence_embeddings[0] + return backend_pb2.EmbeddingResult(embeddings=embeds) async def _predict(self, request, context, streaming=False): set_seed(request.Seed) diff --git a/backend/python/transformers/requirements-cpu.txt b/backend/python/transformers/requirements-cpu.txt index 56b773256c43..421c4b809c4e 100644 --- a/backend/python/transformers/requirements-cpu.txt +++ b/backend/python/transformers/requirements-cpu.txt @@ -3,4 +3,5 @@ llvmlite==0.43.0 accelerate transformers bitsandbytes -outetts \ No newline at end of file +outetts +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/requirements-cublas11.txt b/backend/python/transformers/requirements-cublas11.txt index 924b0086cc9f..c5d18d0948e6 100644 --- a/backend/python/transformers/requirements-cublas11.txt +++ b/backend/python/transformers/requirements-cublas11.txt @@ -4,4 +4,5 @@ llvmlite==0.43.0 accelerate transformers bitsandbytes -outetts \ No newline at end of file +outetts +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/requirements-cublas12.txt b/backend/python/transformers/requirements-cublas12.txt index 0feb3d81bd98..c0bcfc87e6b8 100644 --- a/backend/python/transformers/requirements-cublas12.txt +++ b/backend/python/transformers/requirements-cublas12.txt @@ -3,4 +3,5 @@ accelerate llvmlite==0.43.0 transformers bitsandbytes -outetts \ No newline at end of file +outetts +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/requirements-hipblas.txt b/backend/python/transformers/requirements-hipblas.txt index fa65fb8e8d00..e7f538601772 100644 --- a/backend/python/transformers/requirements-hipblas.txt +++ b/backend/python/transformers/requirements-hipblas.txt @@ -4,4 +4,6 @@ accelerate transformers llvmlite==0.43.0 bitsandbytes -outetts \ No newline at end of file +outetts +bitsandbytes +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/requirements-intel.txt b/backend/python/transformers/requirements-intel.txt index 4a29559941e6..aada6e00fe80 100644 --- a/backend/python/transformers/requirements-intel.txt +++ b/backend/python/transformers/requirements-intel.txt @@ -6,4 +6,5 @@ optimum[openvino] llvmlite==0.43.0 intel-extension-for-transformers bitsandbytes -outetts \ No newline at end of file +outetts +sentence-transformers==3.3.1 diff --git a/backend/python/transformers/test.py b/backend/python/transformers/test.py index 305b0a938c52..14efa6a7d8ab 100644 --- a/backend/python/transformers/test.py +++ b/backend/python/transformers/test.py @@ -133,5 +133,41 @@ def test_sound_generation(self): except Exception as err: print(err) self.fail("SoundGeneration service failed") + finally: + self.tearDown() + + def test_embed_load_model(self): + """ + This method tests if the model is loaded successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer")) + self.assertTrue(response.success) + self.assertEqual(response.message, "Model loaded successfully") + except Exception as err: + print(err) + self.fail("LoadModel service failed") + finally: + self.tearDown() + + def test_sentencetransformers_embedding(self): + """ + This method tests if the embeddings are generated successfully + """ + try: + self.setUp() + with grpc.insecure_channel("localhost:50051") as channel: + stub = backend_pb2_grpc.BackendStub(channel) + response = stub.LoadModel(backend_pb2.ModelOptions(Model="bert-base-nli-mean-tokens",Type="SentenceTransformer")) + self.assertTrue(response.success) + embedding_request = backend_pb2.PredictOptions(Embeddings="This is a test sentence.") + embedding_response = stub.Embedding(embedding_request) + self.assertIsNotNone(embedding_response.embeddings) + except Exception as err: + print(err) + self.fail("Embedding service failed") finally: self.tearDown() \ No newline at end of file diff --git a/core/http/app_test.go b/core/http/app_test.go index 6bf1806b4f14..a2e2f7585100 100644 --- a/core/http/app_test.go +++ b/core/http/app_test.go @@ -822,7 +822,7 @@ var _ = Describe("API test", func() { application, err := application.New( append(commonOpts, - config.WithExternalBackend("huggingface", os.Getenv("HUGGINGFACE_GRPC")), + config.WithExternalBackend("transformers", os.Getenv("HUGGINGFACE_GRPC")), config.WithContext(c), config.WithModelPath(modelPath), )...) diff --git a/pkg/model/initializers.go b/pkg/model/initializers.go index f4675050ab04..eb3e4fdf75c1 100644 --- a/pkg/model/initializers.go +++ b/pkg/model/initializers.go @@ -22,11 +22,19 @@ import ( ) var Aliases map[string]string = map[string]string{ - "go-llama": LLamaCPP, - "llama": LLamaCPP, - "embedded-store": LocalStoreBackend, - "langchain-huggingface": LCHuggingFaceBackend, - "transformers-musicgen": TransformersBackend, + "go-llama": LLamaCPP, + "llama": LLamaCPP, + "embedded-store": LocalStoreBackend, + "huggingface-embeddings": TransformersBackend, + "langchain-huggingface": LCHuggingFaceBackend, + "transformers-musicgen": TransformersBackend, + "sentencetransformers": TransformersBackend, +} + +var TypeAlias map[string]string = map[string]string{ + "sentencetransformers": "SentenceTransformer", + "huggingface-embeddings": "SentenceTransformer", + "transformers-musicgen": "MusicgenForConditionalGeneration", } var AutoDetect = os.Getenv("DISABLE_AUTODETECT") != "true" @@ -396,6 +404,7 @@ func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) fu } log.Debug().Msgf("Wait for the service to start up") + log.Debug().Msgf("Options: %+v", o.gRPCOptions) // Wait for the service to start up ready := false @@ -460,8 +469,15 @@ func (ml *ModelLoader) backendLoader(opts ...Option) (client grpc.Backend, err e backend := strings.ToLower(o.backendString) if realBackend, exists := Aliases[backend]; exists { + typeAlias, exists := TypeAlias[backend] + if exists { + log.Debug().Msgf("'%s' is a type alias of '%s' (%s)", backend, realBackend, typeAlias) + o.gRPCOptions.Type = typeAlias + } else { + log.Debug().Msgf("'%s' is an alias of '%s'", backend, realBackend) + } + backend = realBackend - log.Debug().Msgf("%s is an alias of %s", backend, realBackend) } ml.stopActiveBackends(o.modelID, o.singleActiveBackend) diff --git a/tests/models_fixtures/grpc.yaml b/tests/models_fixtures/grpc.yaml index 31c406ab8484..8c51992056c5 100644 --- a/tests/models_fixtures/grpc.yaml +++ b/tests/models_fixtures/grpc.yaml @@ -1,5 +1,5 @@ name: code-search-ada-code-001 -backend: huggingface +backend: sentencetransformers embeddings: true parameters: model: all-MiniLM-L6-v2 \ No newline at end of file