Merge pull request #78 from NOAA-GSL/IDSSE-936-2

Idsse 936 2
NOAA-GSL · Oct 16, 2024 · be04323 · be04323
2 parents d62cb11 + 1555ce9
commit be04323
Show file tree

Hide file tree

Showing 4 changed files with 457 additions and 0 deletions.
diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml
@@ -26,6 +26,16 @@ jobs:
           python -m pip install --upgrade pip
           pip install pytest pylint==2.17.5 python-dateutil==2.8.2 pint==0.21 importlib-metadata==6.7.0 jsonschema==4.19.0 pika==1.3.1 pyproj numpy==1.26.2 shapely==2.0.2 netcdf4==1.6.3 h5netcdf==1.1.0 pillow==10.2.0 python-logging-rabbitmq==2.3.0
 
+      - name: Checkout idss-engine-commons
+        uses: actions/checkout@v2
+        with:
+          repository: NOAA-GSL/idss-engine-commons
+          path: commons/
+
+      - name: Install IDSSE python commons
+        working-directory: commons/python/idsse_common
+        run: pip install .
+
       - name: Set PYTHONPATH for pylint
         run: |
           echo "PYTHONPATH=python/idsse_common/idsse/common" >> $GITHUB_ENV

diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -28,6 +28,16 @@ jobs:
       - name: Set PYTHONPATH for pytest
         run: |
           echo "PYTHONPATH=python/idsse_common/idsse/common" >> $GITHUB_ENV
+      
+      - name: Checkout idss-engine-commons
+        uses: actions/checkout@v2
+        with:
+          repository: NOAA-GSL/idss-engine-commons
+          path: commons/
+
+      - name: Install IDSSE python commons
+        working-directory: commons/python/idsse_common
+        run: pip install .
 
       - name: Test with pytest
         working-directory: python/idsse_common

diff --git a/python/idsse_common/idsse/common/sci/bit_pack.py b/python/idsse_common/idsse/common/sci/bit_pack.py
@@ -0,0 +1,326 @@
+"""Utility module for packing data"""
+# ----------------------------------------------------------------------------------
+# Created on Thu Dec 14 2023
+#
+# Copyright (c) 2023 Regents of the University of Colorado. All rights reserved. (1)
+# Copyright (c) 2023 Colorado State University. All rights reserved. (2)
+#
+# Contributors:
+#     Geary Layne (1)
+#     Paul Hamer (2)
+#
+# ----------------------------------------------------------------------------------
+
+from enum import IntEnum
+from typing import NamedTuple
+import numpy
+
+class PackType(IntEnum):
+    """Enumerated type used to indicate if data is packed into a byte or short (16 bit)"""
+    BYTE = 8
+    SHORT = 16
+
+
+class PackInfo(NamedTuple):
+    """Data class used to hold packing info"""
+    type: PackType
+    scale: float
+    offset: float
+
+
+class PackData(NamedTuple):
+    """Data class used to hold packed data and the corresponding packing info"""
+    type: PackType
+    scale: float
+    offset: float
+    data: list | numpy.ndarray
+
+def get_min_max(data: list | numpy.ndarray) -> (float, float):
+    """Get the minimum and maximum from the numpy array or list. Testing showed (on a mac)
+    that numpy was faster...
+
+    Args:
+        data (list | numpy.ndarray): Input data, either in list form
+                                        (e.g. list[list[float]] for 2D) or as a numpy array
+    Raises:
+        TypeError: If the arguments for the call to the function are invalid
+    Returns:
+        (float, float): The minimum, maximum from the supplied argument
+    """
+    if isinstance(data, list):
+        arr = numpy.array(data).ravel(order='K')
+        return numpy.min(arr), numpy.max(arr)
+    data.ravel(order='K')
+    return numpy.min(data), numpy.max(data)
+
+def get_pack_info(
+        min_value: float,
+        max_value: float,
+        decimals: int = None,
+        pack_type: PackType = None
+) -> PackInfo:
+    """Retrieve appropriate packing info based on min/max values and
+    either decimals of precision or pack type.
+
+    Args:
+        min_value (float): The minimum value of the data to be packed
+        max_value (float): The maximum value of the data to be packed.
+        decimals (int, optional): The number of decimal places of precision being requested.
+                                  If the decimals is None, it indicates the PackType should
+                                  be used. Defaults to None.
+        pack_type (PackType, optional): The pack type (byte or short) being requested.
+                                        If the pack_type is None, it indicate that decimals
+                                        of precision should be used. Defaults to None.
+
+    Raises:
+        ValueError: If one and only one of decimals aor pack_type aren't provided, or
+                    if not appropriate packing type can be found.
+
+    Returns:
+        PackInfo: The determined packing info, including packing type (byte or short), and
+                  scale and offset.
+    """
+    if decimals is None and pack_type is None:
+        raise ValueError('Either decimals or pack_type must be non None')
+
+    if decimals is not None and pack_type is not None:
+        raise ValueError('Either decimals or pack_type must be non None, but not both')
+
+    if pack_type:
+        scale = (max_value - min_value) / _max_values[pack_type]
+        return PackInfo(pack_type, float(scale), float(min_value))
+
+    scale = _get_scale(decimals)
+    unique_values = int((max_value-min_value) / scale)
+    for p_type, max_val in _max_values.items():
+        if unique_values <= max_val:
+            return PackInfo(p_type, float(scale), float(min_value))
+
+    raise ValueError('Unable to find appropriate packing')
+
+
+def pack_to_list(
+        data: list | numpy.ndarray,
+        pack_info: PackInfo | None = None,
+        decimals: int | None = None,
+        in_place: bool = True
+):
+    """Preform bit packing of input data, utilizing the the pack_info if provided or
+    a derived pack_info if not.
+
+    Args:
+        data (list | numpy.ndarray): Input data, either in list form
+                                        (e.g. list[list[float]] for 2D) or as a numpy array
+        pack_info (PackInfo | None, optional): Pre-determined packing info. Defaults to None.
+        decimals (int | None, optional): If pack_info is not provided, decimals is used
+                                            to derive packing info. Defaults to None.
+        in_place (bool, optional): A flag to indicate to preform bit packing in place to the
+                                   extent possible. Defaults to True.
+
+    Raises:
+        KeyError: If the input data is not of support type.
+
+    Returns:
+        PackData: Returns the packed data and meta data (i.e. PackInfo)
+    """
+    if isinstance(data, numpy.ndarray):
+        return pack_numpy_to_list(data, pack_info, decimals)
+
+    if isinstance(data, list):
+        return pack_list_to_list(data, pack_info, decimals, in_place)
+
+    raise KeyError(f'Data type ({type(data)}), is not supported')
+
+
+def pack_numpy_to_numpy(
+        data: numpy.ndarray,
+        pack_info: PackInfo | None = None,
+        decimals: int | None = None,
+        in_place: bool = True
+) -> PackData:
+    """Preform bit packing of input data, utilizing the the pack_info if provided or
+    a derived pack_info if not. Input and output are numpy arrays.
+
+    Args:
+        data (numpy.ndarray): Input data.
+        pack_info (PackInfo | None, optional): Pre-determined packing info. Defaults to None.
+        decimals (int | None, optional): If pack_info is not provided, decimals is used
+                                            to derive packing info. Defaults to None.
+        in_place (bool, optional): A flag to indicate to preform bit packing in place to the
+                                   extent possible. Defaults to True.
+
+    Raises:
+        ValueError: If input data is not numpy array
+
+    Returns:
+        PackData: Returns the packed data and meta data (i.e. PackInfo), where data will be
+                  numpy array.
+    """
+    if not isinstance(data, numpy.ndarray):
+        raise ValueError(f'Data must be a numpy.ndarray but is of type {type(data)}')
+
+    pack_type, scale, offset = _resolve_pack_info(data, pack_info, decimals)
+
+    if in_place:
+        if not numpy.issubdtype(data.dtype, numpy.floating):
+            raise ValueError('Can not complete "in_place" bit packing with '
+                             'non floating point array')
+        data -= offset
+    else:
+        data = data-offset
+
+    data /= scale
+    return PackData(pack_type, scale, offset, numpy.trunc(data, out=data))
+
+
+def pack_list_to_list(
+        data: list,
+        pack_info: PackInfo | None = None,
+        decimals: int | None = None,
+        in_place: bool = True
+) -> PackData:
+    """Preform bit-packing of input data, utilizing the pack_info if provided or
+    a derived pack_info if not. Input and output are python list (can be nested lists).
+
+    Args:
+        data (list): Input data.
+        pack_info (PackInfo | None, optional): Pre-determined packing info. Defaults to None.
+        decimals (int | None, optional): If pack_info is not provided, decimals is used
+                                            to derive packing info. Defaults to None.
+        in_place (bool, optional): A flag to indicate to preform bit packing in place to the
+                                   extent possible. Defaults to True.
+
+    Raises:
+        ValueError: If input data is not python list
+
+    Returns:
+        PackData: Returns the packed data and meta data (i.e. PackInfo), where data will be
+                  python list (possibly nested).
+    """
+    if not isinstance(data, list):
+        raise ValueError(f'Data must be a python list but is of type {type(data)}')
+
+    pack_type, scale, offset = _resolve_pack_info(data, pack_info, decimals)
+    if in_place:
+        return PackData(pack_type, scale, offset, _pack_list_to_list_in_place(data, scale, offset))
+    return PackData(pack_type, scale, offset, _pack_list_to_list_copy(data, scale, offset))
+
+
+def pack_numpy_to_list(
+        data: numpy.array,
+        pack_info: PackInfo | None = None,
+        decimals: int | None = None,
+) -> PackData:
+    """Preform bit packing of input data, utilizing the the pack_info if provided or
+    a derived pack_info if not. Input is numpy array and output is python list.
+
+    Args:
+        data (numpy.ndarray): Input data.
+        pack_info (PackInfo | None, optional): Pre-determined packing info. Defaults to None.
+        decimals (int | None, optional): If pack_info is not provided, decimals is used
+                                            to derive packing info. Defaults to None.
+
+    Raises:
+        ValueError: If input data is not numpy array
+
+    Returns:
+        PackData: Returns the packed data and meta data (i.e. PackInfo), where data will be
+                  python list.
+    """
+    if not isinstance(data, numpy.ndarray):
+        raise ValueError(f'Data must be a numpy.ndarray but is of type {type(data)}')
+
+    pack_type, scale, offset = _resolve_pack_info(data, pack_info, decimals)
+    return PackData(pack_type, scale, offset, _pack_np_array_to_list(data, scale, offset))
+
+
+# determine the appropriate pack info, basically if pack info if provided return that,
+# else compute it based on decimal, and if nothing else use short packing. This is needed
+# because of the overloaded nature of the public pack functions. A caller can provide a
+# specific pack_info to use or the decimal precision (which with the actual data can be used to
+# create a pack_info), or lastly if not providing a pack_info or decimal then a pack_info using
+# short packing would be used.
+def _resolve_pack_info(
+        data,
+        pack_info: PackInfo | None = None,
+        decimals: int | None = None
+) -> PackInfo:
+    return (PackInfo(pack_info.type,
+                     float(pack_info.scale),
+                     float(pack_info.offset))
+            if pack_info is not None
+            else get_pack_info(numpy.min(data),
+                               numpy.max(data),
+                               decimals=decimals)
+            if decimals is not None
+            else get_pack_info(numpy.min(data),
+                               numpy.max(data),
+                               pack_type=PackType.SHORT)
+            )
+
+
+# core packing code specific to in place packing of a list(s)
+def _pack_list_to_list_in_place(
+        data: list,
+        scale: float,
+        offset: float
+) -> list:
+    # Convert list into numpy array (it creates a copy)
+    np_data = numpy.array(data, dtype=float)
+    data = _pack_np_array_to_list(np_data, scale, offset)
+    return data
+
+# core packing code specific to packing a list(s) with forced copying
+def _pack_list_to_list_copy(
+        data: list,
+        scale: float,
+        offset: float
+) -> list:
+    np_data = numpy.array(data, dtype=float)
+    return _pack_np_array_to_list(np_data, scale, offset)
+
+
+# core packing code specific to packing numpy array to a list, in place not possible
+def _pack_np_array_to_list(
+        data: numpy.array,
+        scale: float,
+        offset: float
+) -> list:
+    np_data = numpy.copy(data)
+    np_data -= offset
+    np_data /= scale
+    # Return the truncated array and a int list.
+    return (numpy.trunc(np_data).astype(int)).tolist()
+
+# core packing code using diplib package (sometimes slower than the original so not used but here
+# for an option.
+#def _diplib_pack(data: numpy.array,
+#                 scale: float,
+#                 offset: float) -> numpy.array:
+#    dip_data = dip.Image(data)
+#    dip_data -= offset
+#    dip_data /= scale
+#    return numpy.trunc(dip_data)
+
+
+# private lookup for the max value possible for bit packing type
+_max_values = {
+    PackType.BYTE: 255,
+    PackType.SHORT: 65535
+}
+
+_scale_lookup = {
+    0: 1.,
+    1: 0.1,
+    2: 0.01,
+    3: 0.001,
+    4: 0.0001,
+    5: 0.00001,
+    6: 0.000001
+}
+
+
+# private lookup function of num decimal to actual decimals. Using the lookup remove rounding
+# issues with math.pow for the most common decimal values
+def _get_scale(decimals):
+    return _scale_lookup.get(decimals, .1**decimals)