Skip to content

Commit

Permalink
Merge pull request #136 from deepghs/dev/transformers
Browse files Browse the repository at this point in the history
dev(narugo): prepare for the clip preprocessor
  • Loading branch information
narugo1992 authored Jan 27, 2025
2 parents 1fa81a1 + 92633f4 commit 2e8a796
Show file tree
Hide file tree
Showing 24 changed files with 1,343 additions and 12 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,13 @@ jobs:
python -m pip install -r requirements-model.txt
python -m pip install -r requirements-doc.txt
python -m pip install -r requirements-torchvision.txt
python -m pip install -r requirements-transformers.txt
- name: Prepare dataset
uses: nick-fields/retry@v2
if: ${{ github.event_name == 'push' }}
env:
CI: 'true'
HF_TOKEN: ${{ secrets.HF_TOKEN }}
with:
shell: bash
timeout_minutes: 20
Expand All @@ -64,6 +66,7 @@ jobs:
env:
ENV_PROD: 'true'
PLANTUML_HOST: http://localhost:18080
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
plantumlcli -c
make docs
Expand Down Expand Up @@ -117,11 +120,15 @@ jobs:
sudo apt-get install -y make wget curl cloc graphviz pandoc
dot -V
python -m pip install -r requirements.txt
python -m pip install -r requirements-model.txt
python -m pip install -r requirements-doc.txt
python -m pip install -r requirements-torchvision.txt
python -m pip install -r requirements-transformers.txt
- name: Prepare dataset
uses: nick-fields/retry@v2
env:
CI: 'true'
HF_TOKEN: ${{ secrets.HF_TOKEN }}
with:
shell: bash
timeout_minutes: 20
Expand All @@ -137,6 +144,7 @@ jobs:
env:
ENV_PROD: 'true'
PLANTUML_HOST: http://localhost:18080
HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
git fetch --all --tags
git branch -av
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ jobs:
run: |
pip install -r requirements-model.txt
pip install -r requirements-torchvision.txt
pip install -r requirements-transformers.txt
- name: Test the basic environment
shell: bash
run: |
Expand Down
1 change: 1 addition & 0 deletions docs/source/api_doc/preprocess/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ imgutils.preprocess
base
pillow
torchvision
transformers

88 changes: 88 additions & 0 deletions docs/source/api_doc/preprocess/transformers.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
imgutils.preprocess.transformers
===========================================

.. currentmodule:: imgutils.preprocess.transformers

.. automodule:: imgutils.preprocess.transformers


register_creators_for_transformers
--------------------------------------------------------------------

.. autofunction:: register_creators_for_transformers



NotProcessorTypeError
--------------------------------------------------------------------

.. autoclass:: NotProcessorTypeError



create_transforms_from_transformers
--------------------------------------------------------------------

.. autofunction:: create_transforms_from_transformers




create_clip_transforms
--------------------------------------------------------------------

.. autofunction:: create_clip_transforms



create_transforms_from_clip_processor
--------------------------------------------------------------------

.. autofunction:: create_transforms_from_clip_processor




create_convnext_transforms
--------------------------------------------------------------------

.. autofunction:: create_convnext_transforms



create_transforms_from_convnext_processor
--------------------------------------------------------------------

.. autofunction:: create_transforms_from_convnext_processor



create_vit_transforms
--------------------------------------------------------------------

.. autofunction:: create_vit_transforms



create_transforms_from_vit_processor
--------------------------------------------------------------------

.. autofunction:: create_transforms_from_vit_processor



create_siglip_transforms
--------------------------------------------------------------------

.. autofunction:: create_siglip_transforms



create_transforms_from_siglip_processor
--------------------------------------------------------------------

.. autofunction:: create_transforms_from_siglip_processor




56 changes: 56 additions & 0 deletions docs/source/api_doc/preprocess/transformers_supported.demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import re
import warnings

import pandas as pd
import transformers
from hfutils.operate import get_hf_client

from imgutils.preprocess.transformers.base import _FN_CREATORS

hf_client = get_hf_client()
df = pd.read_parquet(hf_client.hf_hub_download(
repo_id='deepghs/hf_models_preprocessors',
repo_type='dataset',
filename='repos.parquet'
))
df = df[~df['image_processor_type'].isnull()]
df = df.sort_values(by=['likes'], ascending=[False])

d_repo_count = {
item['image_processor_type']: item['count']
for item in df['image_processor_type'].value_counts().reset_index().to_dict('records')
}

d_create_functions = {}
for xfn in _FN_CREATORS:
xname = xfn.__name__
matching = re.fullmatch('^create_transforms_from_(?P<name>[\s\S]+)_processor$', xname)
if not matching:
warnings.warn(f'Cannot determine transformer type of {xfn!r}.')
continue
raw_name = matching.group('name').replace('_', '').lower()
d_create_functions[raw_name] = xname

suffix = 'ImageProcessor'

rows = []
for name in dir(transformers):
if name.endswith(suffix) and isinstance(getattr(transformers, name), type) \
and issubclass(getattr(transformers, name), transformers.BaseImageProcessor) \
and getattr(transformers, name) is not transformers.BaseImageProcessor:
cls = getattr(transformers, name)
pname = name[:-len(suffix)].lower()

rows.append({
'Name': name,
'Supported': '✅' if pname in d_create_functions else '❌',
'Repos': d_repo_count.get(name, 0),
'Function': f':func:`{d_create_functions[pname]}`' if pname in d_create_functions else 'N/A'
})

df = pd.DataFrame(rows)
total = df['Repos'].sum()
df = df[df['Repos'] >= 5]
df = df.sort_values(by=['Repos', 'Supported', 'Name'], ascending=[False, True, True])
df['Repos'] = df['Repos'].map(lambda x: f'{x} ({x / total * 100.0:.2f}%)')
print(df.to_markdown(headers='keys', tablefmt='rst', index=False))
1 change: 1 addition & 0 deletions imgutils/preprocess/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
register_pillow_parse, parse_pillow_transforms
from .torchvision import register_torchvision_transform, create_torchvision_transforms, \
register_torchvision_parse, parse_torchvision_transforms
from .transformers import *
149 changes: 149 additions & 0 deletions imgutils/preprocess/pillow.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from PIL import Image

from .base import NotParseTarget
from ..data import load_image

# noinspection PyUnresolvedReferences
_INT_TO_PILLOW = {
Expand Down Expand Up @@ -651,6 +652,154 @@ def _parse_normalize(obj: PillowNormalize):
}


class PillowConvertRGB:
"""
A class for converting images to RGB format.
This class provides functionality to convert PIL Images to RGB format,
with an option to specify background color for images with transparency.
:param force_background: Background color to use when converting images with alpha channel.
Default is 'white'.
"""

def __init__(self, force_background: Optional[str] = 'white'):
self.force_background = force_background

def __call__(self, pic):
"""
Convert the input image to RGB format.
:param pic: Input image to be converted
:type pic: PIL.Image.Image
:return: RGB converted image
:rtype: PIL.Image.Image
:raises TypeError: If input is not a PIL Image
"""
if not isinstance(pic, Image.Image):
raise TypeError('pic should be PIL Image. Got {}'.format(type(pic)))
return load_image(pic, mode='RGB', force_background=self.force_background)

def __repr__(self):
"""
Return string representation of the class.
:return: String representation
:rtype: str
"""
return f'{self.__class__.__name__}(force_background={self.force_background!r})'


@register_pillow_transform('convert_rgb')
def _create_convert_rgb(force_background: Optional[str] = 'white'):
"""
Factory function to create PillowConvertRGB instance.
:param force_background: Background color for transparency conversion
:type force_background: Optional[str]
:return: PillowConvertRGB instance
:rtype: PillowConvertRGB
"""
return PillowConvertRGB(force_background=force_background)


@register_pillow_parse('convert_rgb')
def _parse_convert_rgb(obj):
"""
Parse PillowConvertRGB object to dictionary configuration.
:param obj: Object to parse
:type obj: Any
:return: Configuration dictionary
:rtype: dict
:raises NotParseTarget: If object is not PillowConvertRGB instance
"""
if not isinstance(obj, PillowConvertRGB):
raise NotParseTarget

obj: PillowConvertRGB
return {
'force_background': obj.force_background,
}


class PillowRescale:
"""
A class for rescaling image pixel values.
This class provides functionality to rescale numpy array values by a given factor,
commonly used to normalize image pixel values (e.g., from [0-255] to [0-1]).
:param rescale_factor: Factor to multiply pixel values by. Default is 1/255.
:type rescale_factor: float
"""

def __init__(self, rescale_factor: float = 1 / 255):
self.rescale_factor = np.float32(rescale_factor)

def __call__(self, array):
"""
Rescale the input array values.
:param array: Input array to be rescaled
:type array: numpy.ndarray
:return: Rescaled array
:rtype: numpy.ndarray
:raises TypeError: If input is not a numpy array
"""
if not isinstance(array, np.ndarray):
raise TypeError('Input should be a numpy.ndarray')
return array * self.rescale_factor

def __repr__(self):
"""
Return string representation of the class.
:return: String representation
:rtype: str
"""
return f'{self.__class__.__name__}(rescale_factor={self.rescale_factor!r})'


@register_pillow_transform('rescale')
def _create_rescale(rescale_factor: float = 1 / 255):
"""
Factory function to create PillowRescale instance.
:param rescale_factor: Factor for rescaling pixel values
:type rescale_factor: float
:return: PillowRescale instance
:rtype: PillowRescale
"""
return PillowRescale(rescale_factor=rescale_factor)


@register_pillow_parse('rescale')
def _parse_rescale(obj):
"""
Parse PillowRescale object to dictionary configuration.
:param obj: Object to parse
:type obj: Any
:return: Configuration dictionary
:rtype: dict
:raises NotParseTarget: If object is not PillowRescale instance
"""
if not isinstance(obj, PillowRescale):
raise NotParseTarget

obj: PillowRescale
return {
'rescale_factor': obj.rescale_factor.item(),
}


class PillowCompose:
"""
Composes several transforms together into a single transform.
Expand Down
Loading

0 comments on commit 2e8a796

Please sign in to comment.