Merge pull request #136 from deepghs/dev/transformers

dev(narugo): prepare for the clip preprocessor
deepghs · Jan 27, 2025 · 2e8a796 · 2e8a796
2 parents 1fa81a1 + 92633f4
commit 2e8a796
Show file tree

Hide file tree

Showing 24 changed files with 1,343 additions and 12 deletions.
diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
@@ -47,11 +47,13 @@ jobs:
           python -m pip install -r requirements-model.txt
           python -m pip install -r requirements-doc.txt
           python -m pip install -r requirements-torchvision.txt
+          python -m pip install -r requirements-transformers.txt
       - name: Prepare dataset
         uses: nick-fields/retry@v2
         if: ${{ github.event_name == 'push' }}
         env:
           CI: 'true'
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         with:
           shell: bash
           timeout_minutes: 20
@@ -64,6 +66,7 @@ jobs:
         env:
           ENV_PROD: 'true'
           PLANTUML_HOST: http://localhost:18080
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           plantumlcli -c
           make docs
@@ -117,11 +120,15 @@ jobs:
           sudo apt-get install -y make wget curl cloc graphviz pandoc
           dot -V
           python -m pip install -r requirements.txt
+          python -m pip install -r requirements-model.txt
           python -m pip install -r requirements-doc.txt
+          python -m pip install -r requirements-torchvision.txt
+          python -m pip install -r requirements-transformers.txt
       - name: Prepare dataset
         uses: nick-fields/retry@v2
         env:
           CI: 'true'
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         with:
           shell: bash
           timeout_minutes: 20
@@ -137,6 +144,7 @@ jobs:
         env:
           ENV_PROD: 'true'
           PLANTUML_HOST: http://localhost:18080
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
         run: |
           git fetch --all --tags
           git branch -av

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -101,6 +101,7 @@ jobs:
         run: |
           pip install -r requirements-model.txt
           pip install -r requirements-torchvision.txt
+          pip install -r requirements-transformers.txt
       - name: Test the basic environment
         shell: bash
         run: |

diff --git a/docs/source/api_doc/preprocess/index.rst b/docs/source/api_doc/preprocess/index.rst
@@ -12,4 +12,5 @@ imgutils.preprocess
     base
     pillow
     torchvision
+    transformers
 
diff --git a/docs/source/api_doc/preprocess/transformers.rst b/docs/source/api_doc/preprocess/transformers.rst
@@ -0,0 +1,88 @@
+imgutils.preprocess.transformers
+===========================================
+
+.. currentmodule:: imgutils.preprocess.transformers
+
+.. automodule:: imgutils.preprocess.transformers
+
+
+register_creators_for_transformers
+--------------------------------------------------------------------
+
+.. autofunction:: register_creators_for_transformers
+
+
+
+NotProcessorTypeError
+--------------------------------------------------------------------
+
+.. autoclass:: NotProcessorTypeError
+
+
+
+create_transforms_from_transformers
+--------------------------------------------------------------------
+
+.. autofunction:: create_transforms_from_transformers
+
+
+
+
+create_clip_transforms
+--------------------------------------------------------------------
+
+.. autofunction:: create_clip_transforms
+
+
+
+create_transforms_from_clip_processor
+--------------------------------------------------------------------
+
+.. autofunction:: create_transforms_from_clip_processor
+
+
+
+
+create_convnext_transforms
+--------------------------------------------------------------------
+
+.. autofunction:: create_convnext_transforms
+
+
+
+create_transforms_from_convnext_processor
+--------------------------------------------------------------------
+
+.. autofunction:: create_transforms_from_convnext_processor
+
+
+
+create_vit_transforms
+--------------------------------------------------------------------
+
+.. autofunction:: create_vit_transforms
+
+
+
+create_transforms_from_vit_processor
+--------------------------------------------------------------------
+
+.. autofunction:: create_transforms_from_vit_processor
+
+
+
+create_siglip_transforms
+--------------------------------------------------------------------
+
+.. autofunction:: create_siglip_transforms
+
+
+
+create_transforms_from_siglip_processor
+--------------------------------------------------------------------
+
+.. autofunction:: create_transforms_from_siglip_processor
+
+
+
+
diff --git a/docs/source/api_doc/preprocess/transformers_supported.demo.py b/docs/source/api_doc/preprocess/transformers_supported.demo.py
@@ -0,0 +1,56 @@
+import re
+import warnings
+
+import pandas as pd
+import transformers
+from hfutils.operate import get_hf_client
+
+from imgutils.preprocess.transformers.base import _FN_CREATORS
+
+hf_client = get_hf_client()
+df = pd.read_parquet(hf_client.hf_hub_download(
+    repo_id='deepghs/hf_models_preprocessors',
+    repo_type='dataset',
+    filename='repos.parquet'
+))
+df = df[~df['image_processor_type'].isnull()]
+df = df.sort_values(by=['likes'], ascending=[False])
+
+d_repo_count = {
+    item['image_processor_type']: item['count']
+    for item in df['image_processor_type'].value_counts().reset_index().to_dict('records')
+}
+
+d_create_functions = {}
+for xfn in _FN_CREATORS:
+    xname = xfn.__name__
+    matching = re.fullmatch('^create_transforms_from_(?P<name>[\s\S]+)_processor$', xname)
+    if not matching:
+        warnings.warn(f'Cannot determine transformer type of {xfn!r}.')
+        continue
+    raw_name = matching.group('name').replace('_', '').lower()
+    d_create_functions[raw_name] = xname
+
+suffix = 'ImageProcessor'
+
+rows = []
+for name in dir(transformers):
+    if name.endswith(suffix) and isinstance(getattr(transformers, name), type) \
+            and issubclass(getattr(transformers, name), transformers.BaseImageProcessor) \
+            and getattr(transformers, name) is not transformers.BaseImageProcessor:
+        cls = getattr(transformers, name)
+        pname = name[:-len(suffix)].lower()
+
+        rows.append({
+            'Name': name,
+            'Supported': '✅' if pname in d_create_functions else '❌',
+            'Repos': d_repo_count.get(name, 0),
+            'Function': f':func:`{d_create_functions[pname]}`' if pname in d_create_functions else 'N/A'
+        })
+
+df = pd.DataFrame(rows)
+total = df['Repos'].sum()
+df = df[df['Repos'] >= 5]
+df = df.sort_values(by=['Repos', 'Supported', 'Name'], ascending=[False, True, True])
+df['Repos'] = df['Repos'].map(lambda x: f'{x} ({x / total * 100.0:.2f}%)')
+print(df.to_markdown(headers='keys', tablefmt='rst', index=False))
diff --git a/imgutils/preprocess/__init__.py b/imgutils/preprocess/__init__.py
@@ -3,3 +3,4 @@
     register_pillow_parse, parse_pillow_transforms
 from .torchvision import register_torchvision_transform, create_torchvision_transforms, \
     register_torchvision_parse, parse_torchvision_transforms
+from .transformers import *
diff --git a/imgutils/preprocess/pillow.py b/imgutils/preprocess/pillow.py
@@ -19,6 +19,7 @@
 from PIL import Image
 
 from .base import NotParseTarget
+from ..data import load_image
 
 # noinspection PyUnresolvedReferences
 _INT_TO_PILLOW = {
@@ -651,6 +652,154 @@ def _parse_normalize(obj: PillowNormalize):
     }
 
 
+class PillowConvertRGB:
+    """
+    A class for converting images to RGB format.
+
+    This class provides functionality to convert PIL Images to RGB format,
+    with an option to specify background color for images with transparency.
+
+    :param force_background: Background color to use when converting images with alpha channel.
+                           Default is 'white'.
+    """
+
+    def __init__(self, force_background: Optional[str] = 'white'):
+        self.force_background = force_background
+
+    def __call__(self, pic):
+        """
+        Convert the input image to RGB format.
+
+        :param pic: Input image to be converted
+        :type pic: PIL.Image.Image
+
+        :return: RGB converted image
+        :rtype: PIL.Image.Image
+        :raises TypeError: If input is not a PIL Image
+        """
+        if not isinstance(pic, Image.Image):
+            raise TypeError('pic should be PIL Image. Got {}'.format(type(pic)))
+        return load_image(pic, mode='RGB', force_background=self.force_background)
+
+    def __repr__(self):
+        """
+        Return string representation of the class.
+
+        :return: String representation
+        :rtype: str
+        """
+        return f'{self.__class__.__name__}(force_background={self.force_background!r})'
+
+
+@register_pillow_transform('convert_rgb')
+def _create_convert_rgb(force_background: Optional[str] = 'white'):
+    """
+    Factory function to create PillowConvertRGB instance.
+
+    :param force_background: Background color for transparency conversion
+    :type force_background: Optional[str]
+
+    :return: PillowConvertRGB instance
+    :rtype: PillowConvertRGB
+    """
+    return PillowConvertRGB(force_background=force_background)
+
+
+@register_pillow_parse('convert_rgb')
+def _parse_convert_rgb(obj):
+    """
+    Parse PillowConvertRGB object to dictionary configuration.
+
+    :param obj: Object to parse
+    :type obj: Any
+
+    :return: Configuration dictionary
+    :rtype: dict
+    :raises NotParseTarget: If object is not PillowConvertRGB instance
+    """
+    if not isinstance(obj, PillowConvertRGB):
+        raise NotParseTarget
+
+    obj: PillowConvertRGB
+    return {
+        'force_background': obj.force_background,
+    }
+
+
+class PillowRescale:
+    """
+    A class for rescaling image pixel values.
+
+    This class provides functionality to rescale numpy array values by a given factor,
+    commonly used to normalize image pixel values (e.g., from [0-255] to [0-1]).
+
+    :param rescale_factor: Factor to multiply pixel values by. Default is 1/255.
+    :type rescale_factor: float
+    """
+
+    def __init__(self, rescale_factor: float = 1 / 255):
+        self.rescale_factor = np.float32(rescale_factor)
+
+    def __call__(self, array):
+        """
+        Rescale the input array values.
+
+        :param array: Input array to be rescaled
+        :type array: numpy.ndarray
+
+        :return: Rescaled array
+        :rtype: numpy.ndarray
+        :raises TypeError: If input is not a numpy array
+        """
+        if not isinstance(array, np.ndarray):
+            raise TypeError('Input should be a numpy.ndarray')
+        return array * self.rescale_factor
+
+    def __repr__(self):
+        """
+        Return string representation of the class.
+
+        :return: String representation
+        :rtype: str
+        """
+        return f'{self.__class__.__name__}(rescale_factor={self.rescale_factor!r})'
+
+
+@register_pillow_transform('rescale')
+def _create_rescale(rescale_factor: float = 1 / 255):
+    """
+    Factory function to create PillowRescale instance.
+
+    :param rescale_factor: Factor for rescaling pixel values
+    :type rescale_factor: float
+
+    :return: PillowRescale instance
+    :rtype: PillowRescale
+    """
+    return PillowRescale(rescale_factor=rescale_factor)
+
+
+@register_pillow_parse('rescale')
+def _parse_rescale(obj):
+    """
+    Parse PillowRescale object to dictionary configuration.
+
+    :param obj: Object to parse
+    :type obj: Any
+
+    :return: Configuration dictionary
+    :rtype: dict
+    :raises NotParseTarget: If object is not PillowRescale instance
+    """
+    if not isinstance(obj, PillowRescale):
+        raise NotParseTarget
+
+    obj: PillowRescale
+    return {
+        'rescale_factor': obj.rescale_factor.item(),
+    }
+
+
 class PillowCompose:
     """
     Composes several transforms together into a single transform.
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,4 +12,5 @@ imgutils.preprocess @@
         base
         pillow
         torchvision
+        transformers