diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 924207527..cf1908e99 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -113,6 +113,7 @@ jobs: pip install --no-cache-dir "server/[onnx]" pip install --no-cache-dir "server/[transformers]" pip install --no-cache-dir "server/[search]" + pip install open-clip-torch==2.7.0 - name: Test id: test run: | @@ -159,11 +160,13 @@ jobs: pip install -e "server/[tensorrt]" pip install -e "server/[onnx]" pip install -e "server/[transformers]" + pip install nvidia-tensorrt==8.4.1.5 { pip install -e "server/[flash-attn]" } || { echo "flash attention was not installed." } + pip install open-clip-torch==2.7.0 - name: Test id: test run: | diff --git a/server/clip_server/executors/clip_tensorrt.py b/server/clip_server/executors/clip_tensorrt.py index 62176fe4c..6f9067ab4 100644 --- a/server/clip_server/executors/clip_tensorrt.py +++ b/server/clip_server/executors/clip_tensorrt.py @@ -25,6 +25,7 @@ def __init__( num_worker_preprocess: int = 4, minibatch_size: int = 32, access_paths: str = '@r', + dtype: Optional[str] = 'fp32', **kwargs, ): """ @@ -36,6 +37,7 @@ def __init__( number if you encounter OOM errors. :param access_paths: The access paths to traverse on the input documents to get the images and texts to be processed. Visit https://docarray.jina.ai/fundamentals/documentarray/access-elements for more details. + :param dtype: inference data type, defaults to 'fp32'. """ super().__init__(**kwargs) @@ -51,6 +53,7 @@ def __init__( self._access_paths = kwargs['traversal_paths'] self._device = device + self._dtype = dtype import torch @@ -63,7 +66,7 @@ def __init__( torch.cuda.is_available() ), "CUDA/GPU is not available on Pytorch. Please check your CUDA installation" - self._model = CLIPTensorRTModel(name) + self._model = CLIPTensorRTModel(name=name, dtype=dtype) self._model.start_engines() @@ -85,6 +88,7 @@ def _preproc_images(self, docs: 'DocumentArray', drop_image_content: bool): device=self._device, return_np=False, drop_image_content=drop_image_content, + dtype=self._dtype, ) def _preproc_texts(self, docs: 'DocumentArray'): diff --git a/server/clip_server/model/clip_trt.py b/server/clip_server/model/clip_trt.py index 1510003c5..6866c01f9 100644 --- a/server/clip_server/model/clip_trt.py +++ b/server/clip_server/model/clip_trt.py @@ -1,5 +1,5 @@ import os -from typing import Dict +from typing import Dict, Optional try: import tensorrt as trt @@ -51,6 +51,7 @@ class CLIPTensorRTModel(BaseCLIPModel): def __init__( self, name: str, + dtype: Optional[str] = 'fp32', ): super().__init__(name) @@ -59,23 +60,35 @@ def __init__( f'~/.cache/clip/{name.replace("/", "-").replace("::", "-")}' ) - self._textual_path = os.path.join( - cache_dir, - f'textual.{ONNX_MODELS[name][0][1]}.trt', - ) - self._visual_path = os.path.join( - cache_dir, - f'visual.{ONNX_MODELS[name][1][1]}.trt', - ) + if dtype == 'fp16': + self._textual_path = os.path.join( + cache_dir, + f'textual.{ONNX_MODELS[name][0][1]}.fp16.trt', + ) + self._visual_path = os.path.join( + cache_dir, + f'visual.{ONNX_MODELS[name][1][1]}.fp16.trt', + ) + else: + self._textual_path = os.path.join( + cache_dir, + f'textual.{ONNX_MODELS[name][0][1]}.trt', + ) + self._visual_path = os.path.join( + cache_dir, + f'visual.{ONNX_MODELS[name][1][1]}.trt', + ) if not os.path.exists(self._textual_path) or not os.path.exists( self._visual_path ): from clip_server.model.clip_onnx import CLIPOnnxModel + fp16 = dtype == 'fp16' + trt_logger: Logger = trt.Logger(trt.Logger.ERROR) runtime: Runtime = trt.Runtime(trt_logger) - onnx_model = CLIPOnnxModel(name) + onnx_model = CLIPOnnxModel(name=name, dtype=dtype) visual_engine = build_engine( runtime=runtime, @@ -95,7 +108,7 @@ def __init__( onnx_model.image_size, ), workspace_size=10000 * 1024 * 1024, - fp16=False, + fp16=fp16, int8=False, ) save_engine(visual_engine, self._visual_path) @@ -108,7 +121,7 @@ def __init__( optimal_shape=(768, 77), max_shape=(1024, 77), workspace_size=10000 * 1024 * 1024, - fp16=False, + fp16=fp16, int8=False, ) save_engine(text_engine, self._textual_path) diff --git a/server/setup.py b/server/setup.py index 5d9e9511e..d0c60fe0d 100644 --- a/server/setup.py +++ b/server/setup.py @@ -4,91 +4,93 @@ from setuptools import find_packages, setup if sys.version_info < (3, 7, 0): - raise OSError(f'CLIP-as-service requires Python >=3.7, but yours is {sys.version}') + raise OSError(f"CLIP-as-service requires Python >=3.7, but yours is {sys.version}") try: - pkg_name = 'clip-server' + pkg_name = "clip-server" libinfo_py = path.join( - path.dirname(__file__), pkg_name.replace('-', '_'), '__init__.py' + path.dirname(__file__), pkg_name.replace("-", "_"), "__init__.py" ) - libinfo_content = open(libinfo_py, 'r', encoding='utf8').readlines() - version_line = [l.strip() for l in libinfo_content if l.startswith('__version__')][ + libinfo_content = open(libinfo_py, "r", encoding="utf8").readlines() + version_line = [l.strip() for l in libinfo_content if l.startswith("__version__")][ 0 ] exec(version_line) # gives __version__ except FileNotFoundError: - __version__ = '0.0.0' + __version__ = "0.0.0" try: - with open('../README.md', encoding='utf8') as fp: + with open("../README.md", encoding="utf8") as fp: _long_description = fp.read() except FileNotFoundError: - _long_description = '' + _long_description = "" setup( name=pkg_name, packages=find_packages(), version=__version__, include_package_data=True, - description='Embed images and sentences into fixed-length vectors via CLIP', - author='Jina AI', - author_email='hello@jina.ai', - license='Apache 2.0', - url='https://github.com/jina-ai/clip-as-service', - download_url='https://github.com/jina-ai/clip-as-service/tags', + description="Embed images and sentences into fixed-length vectors via CLIP", + author="Jina AI", + author_email="hello@jina.ai", + license="Apache 2.0", + url="https://github.com/jina-ai/clip-as-service", + download_url="https://github.com/jina-ai/clip-as-service/tags", long_description=_long_description, - long_description_content_type='text/markdown', + long_description_content_type="text/markdown", zip_safe=False, - setup_requires=['setuptools>=18.0', 'wheel'], + setup_requires=["setuptools>=18.0", "wheel"], install_requires=[ - 'ftfy', - 'torch', - 'regex', - 'torchvision<=0.13.0' if sys.version_info <= (3, 7, 2) else 'torchvision', - 'jina>=3.12.0', - 'prometheus-client', - 'open_clip_torch>=2.7.0', + "ftfy==6.1.1", + "torch==1.13.0", + "regex==2022.10.31", + "torchvision<=0.13.0" + if sys.version_info <= (3, 7, 2) + else "torchvision==0.14.0", + "jina==3.12.0", + "prometheus-client==0.15.0", + "open_clip_torch==2.7.0", ], extras_require={ - 'onnx': [ - 'onnxruntime', - 'onnx', - 'onnxmltools', + "onnx": [ + "onnxruntime==1.13.1", + "onnx==1.12.0", + "onnxmltools==1.11.1", ] - + (['onnxruntime-gpu>=1.8.0'] if sys.platform != 'darwin' else []), - 'tensorrt': ['nvidia-tensorrt'], - 'transformers': ['transformers>=4.16.2'], - 'search': ['annlite>=0.3.10'], - 'flash-attn': ['flash-attn'], + + (["onnxruntime-gpu==1.13.1"] if sys.platform != "darwin" else []), + "tensorrt": ["nvidia-tensorrt==8.4.1.5"], + "transformers": ["transformers==4.25.1"], + "search": ["annlite>=0.3.10"], + "flash-attn": ["flash-attn==0.2.4"], }, classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Unix Shell', - 'Environment :: Console', - 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', - 'Topic :: Database :: Database Engines/Servers', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Internet :: WWW/HTTP :: Indexing/Search', - 'Topic :: Scientific/Engineering :: Image Recognition', - 'Topic :: Multimedia :: Video', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Mathematics', - 'Topic :: Software Development', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules', + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Unix Shell", + "Environment :: Console", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Topic :: Database :: Database Engines/Servers", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Internet :: WWW/HTTP :: Indexing/Search", + "Topic :: Scientific/Engineering :: Image Recognition", + "Topic :: Multimedia :: Video", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", ], project_urls={ - 'Documentation': 'https://clip-as-service.jina.ai', - 'Source': 'https://github.com/jina-ai/clip-as-service/', - 'Tracker': 'https://github.com/jina-ai/clip-as-service/issues', + "Documentation": "https://clip-as-service.jina.ai", + "Source": "https://github.com/jina-ai/clip-as-service/", + "Tracker": "https://github.com/jina-ai/clip-as-service/issues", }, - keywords='jina openai clip deep-learning cross-modal multi-modal neural-search', + keywords="jina openai clip deep-learning cross-modal multi-modal neural-search", ) diff --git a/tests/conftest.py b/tests/conftest.py index 0726beec3..87057df4b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -58,6 +58,17 @@ def make_trt_flow(port_generator, request): yield f +@pytest.fixture(scope='session', params=['tensorrt']) +def make_trt_flow_fp16(port_generator, request): + from clip_server.executors.clip_tensorrt import CLIPEncoder + + f = Flow(port=port_generator()).add( + name=request.param, uses=CLIPEncoder, uses_with={'dtype': 'fp16'} + ) + with f: + yield f + + @pytest.fixture(params=['torch']) def make_search_flow(tmpdir, port_generator, request): from clip_server.executors.clip_torch import CLIPEncoder diff --git a/tests/test_tensorrt.py b/tests/test_tensorrt.py index 7752073bb..596f65847 100644 --- a/tests/test_tensorrt.py +++ b/tests/test_tensorrt.py @@ -41,6 +41,39 @@ def test_docarray_inputs(make_trt_flow, inputs): assert inputs[0] is r[0] +@pytest.mark.gpu +@pytest.mark.parametrize( + 'inputs', + [ + [Document(text='hello, world'), Document(text='goodbye, world')], + DocumentArray([Document(text='hello, world'), Document(text='goodbye, world')]), + lambda: (Document(text='hello, world') for _ in range(10)), + DocumentArray( + [ + Document(uri='https://docarray.jina.ai/_static/favicon.png'), + Document( + uri=f'{os.path.dirname(os.path.abspath(__file__))}/img/00000.jpg' + ), + Document(text='hello, world'), + Document( + uri=f'{os.path.dirname(os.path.abspath(__file__))}/img/00000.jpg' + ).load_uri_to_image_tensor(), + ] + ), + DocumentArray.from_files( + f'{os.path.dirname(os.path.abspath(__file__))}/**/*.jpg' + ), + ], +) +def test_docarray_inputs_fp16(make_trt_flow_fp16, inputs): + c = Client(server=f'grpc://0.0.0.0:{make_trt_flow_fp16.port}') + r = c.encode(inputs if not callable(inputs) else inputs()) + assert isinstance(r, DocumentArray) + assert r.embeddings.shape + if hasattr(inputs, '__len__'): + assert inputs[0] is r[0] + + @pytest.mark.gpu @pytest.mark.asyncio @pytest.mark.parametrize(