From f520d18f703c6afe12759519213acb5513618d5d Mon Sep 17 00:00:00 2001 From: Hoppe Date: Wed, 28 Jun 2023 15:30:27 +0200 Subject: [PATCH 01/51] started with implementation of xarray: created file... --- heat/core/dxarray.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 heat/core/dxarray.py diff --git a/heat/core/dxarray.py b/heat/core/dxarray.py new file mode 100644 index 0000000000..f707a30c00 --- /dev/null +++ b/heat/core/dxarray.py @@ -0,0 +1,25 @@ +""" +Implements a distributed counterpart of xarray built on top of Heats DNDarray class +""" + +import heat as ht + + +class Dxarray: + """ + Distributed counterpart of xarray. + + Parameters + -------------- + dataarray: DNDarray + entries of the xarray + coords: dictionary + coordinates + entries of the dictionary have the form `dim`:`coords_of_dim` for each `dim` in `dims`, where `coords_of_dim` can either be a list of coordinate labels ("logical coordinates") or an Dxarray of same shape as the original one, also split along the same split axis ("physical coordinates") + dims: List + names of the dimensions + split: Union[int,None] + split dimension of the Dxarray (analogous to split dimension of DNDarray) + """ + + # TODO: @properties ... From c95fee25e299066d9f0ea1547c253fed511682ab Mon Sep 17 00:00:00 2001 From: Hoppe Date: Tue, 4 Jul 2023 14:57:20 +0200 Subject: [PATCH 02/51] merged main... --- heat/core/dxarray.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/heat/core/dxarray.py b/heat/core/dxarray.py index f707a30c00..59d6e1fdb3 100644 --- a/heat/core/dxarray.py +++ b/heat/core/dxarray.py @@ -5,21 +5,21 @@ import heat as ht -class Dxarray: +class DXarray: """ Distributed counterpart of xarray. Parameters -------------- dataarray: DNDarray - entries of the xarray + data entries of the DXarray coords: dictionary coordinates - entries of the dictionary have the form `dim`:`coords_of_dim` for each `dim` in `dims`, where `coords_of_dim` can either be a list of coordinate labels ("logical coordinates") or an Dxarray of same shape as the original one, also split along the same split axis ("physical coordinates") + entries of the dictionary have the form `dim`:`coords_of_dim` for each `dim` in `dims`, where `coords_of_dim` can either be a list of coordinate labels ("logical coordinates") or an DXarray of same shape as the original one, also split along the same split axis ("physical coordinates") dims: List - names of the dimensions + names of the dimensions of the DXarray split: Union[int,None] - split dimension of the Dxarray (analogous to split dimension of DNDarray) + split dimension of the DXarray (analogous to split dimension of DNDarray) """ # TODO: @properties ... From 5f6ebb4e7275e3098adbb769705a453664869ace Mon Sep 17 00:00:00 2001 From: Hoppe Date: Tue, 4 Jul 2023 16:53:08 +0200 Subject: [PATCH 03/51] very basic setup of class DXarray with some getters and setters --- heat/__init__.py | 1 + heat/core/dxarray.py | 25 ------- heat/dxarray/__init__.py | 5 ++ heat/dxarray/dxarray.py | 140 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 146 insertions(+), 25 deletions(-) delete mode 100644 heat/core/dxarray.py create mode 100644 heat/dxarray/__init__.py create mode 100644 heat/dxarray/dxarray.py diff --git a/heat/__init__.py b/heat/__init__.py index 0cac56b609..41856dcb7f 100644 --- a/heat/__init__.py +++ b/heat/__init__.py @@ -17,3 +17,4 @@ from . import sparse from . import spatial from . import utils +from . import dxarray diff --git a/heat/core/dxarray.py b/heat/core/dxarray.py deleted file mode 100644 index 59d6e1fdb3..0000000000 --- a/heat/core/dxarray.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Implements a distributed counterpart of xarray built on top of Heats DNDarray class -""" - -import heat as ht - - -class DXarray: - """ - Distributed counterpart of xarray. - - Parameters - -------------- - dataarray: DNDarray - data entries of the DXarray - coords: dictionary - coordinates - entries of the dictionary have the form `dim`:`coords_of_dim` for each `dim` in `dims`, where `coords_of_dim` can either be a list of coordinate labels ("logical coordinates") or an DXarray of same shape as the original one, also split along the same split axis ("physical coordinates") - dims: List - names of the dimensions of the DXarray - split: Union[int,None] - split dimension of the DXarray (analogous to split dimension of DNDarray) - """ - - # TODO: @properties ... diff --git a/heat/dxarray/__init__.py b/heat/dxarray/__init__.py new file mode 100644 index 0000000000..fbc04925fd --- /dev/null +++ b/heat/dxarray/__init__.py @@ -0,0 +1,5 @@ +""" +import into heat.dxarray namespace +""" + +from .dxarray import * diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py new file mode 100644 index 0000000000..1e549e600b --- /dev/null +++ b/heat/dxarray/dxarray.py @@ -0,0 +1,140 @@ +""" +Implements a distributed counterpart of xarray built on top of Heats DNDarray class +""" + +import heat as ht + +# import xarray as xa +from typing import Union + +__all__ = ["DXarray"] + + +class DXarray: + """ + Distributed counterpart of xarray. + + Parameters + -------------- + values: DNDarray + data entries of the DXarray + dims: list + names of the dimensions of the DXarray + coords: dictionary + coordinates + entries of the dictionary have the form `dim`:`coords_of_dim` for each `dim` in `dims`, + where `coords_of_dim` can either be a list of coordinate labels ("logical coordinates") or an + DXarray of same shape as the original one, also split along the same split axis ("physical coordinates"). + split: Union[int,None] + dimension along which the DXarray is split (analogous to split dimension of DNDarray) + """ + + def __init__( + self, + values: ht.DNDarray, + dims: list, + coords: dict, + name: Union[str, None] = None, + attrs: dict = {}, + ): + self.__values = values + self.__dims = dims + self.__coords = coords + self.__split = values.split + self.__device = values.device + self.__comm = values.comm + self.__name = name + self.__attrs = attrs + + assert len(self.__dims) == self.__values.ndim + assert len(self.__dims) == len(self.__coords) + for coord_item in coords.items(): + if coord_item[1] is not None: + assert ( + isinstance(coord_item[1], ht.DNDarray) + and coord_item[1].device == self.__values.device + and coord_item[1].comm == self.__values.comm + ) + if coord_item[1].split is not None: + 0 == 1 + # ensure correct split dim... + # TODO: we need to introduce some additional consistency checks: compare communicator, devices, and split-dimension of DNDarrays contained in coords and of values... + # physical coordinate arrays must be split along same dim as values... + + @property + def values(self) -> ht.DNDarray: + """ + Get values from DXarray + """ + return self.__values + + @property + def dims(self) -> list: + """ + Get dims from DXarray + """ + return self.__dims + + @property + def coords(self) -> dict: + """ + Get coords from DXarray + """ + return self.__coords + + @property + def split(self) -> Union[int, None]: + """ + Get split dimension from DXarray + """ + return self.__split + + @property + def device(self) -> ht.Device: + """ + Get device from DXarray + """ + return self.__device + + @property + def comm(self) -> ht.Communication: + """ + Get communicator from DXarray + """ + return self.__comm + + @property + def name(self) -> str: + """ + Get name from DXarray + """ + return self.__name + + @property + def attrs(self) -> dict: + """ + Get attributes from DXarray + """ + return self.__attrs + + @values.setter + def values(self, arr: ht.DNDarray): + """ + Set value array of DXarray + """ + # TODO: perform some consistency checks... + self.__values = arr + + @name.setter + def name(self, name: str): + """ + Set name of DXarray + """ + self.__name = name + + @attrs.setter + def attrs(self, attributes: dict): + """ + Set attributes of DXarray + """ + self.__attrs = attributes From cda9d9b61dcabb3f4b0e6736780ce77c69c9cb95 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Wed, 5 Jul 2023 10:56:03 +0200 Subject: [PATCH 04/51] smaller changes --- heat/dxarray/dxarray.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py index 1e549e600b..2db5c89ed0 100644 --- a/heat/dxarray/dxarray.py +++ b/heat/dxarray/dxarray.py @@ -32,22 +32,27 @@ class DXarray: def __init__( self, values: ht.DNDarray, - dims: list, + dims: Union[list, None], coords: dict, name: Union[str, None] = None, attrs: dict = {}, ): self.__values = values + self.__name = name + self.__attrs = attrs + + if dims is not None: + assert len(self.__dims) == self.__values.ndim + self.__dims = dims + else: + self.__dims = ["dim_%d" % k for k in range(self.__values.ndim)] self.__dims = dims + self.__coords = coords self.__split = values.split self.__device = values.device self.__comm = values.comm - self.__name = name - self.__attrs = attrs - assert len(self.__dims) == self.__values.ndim - assert len(self.__dims) == len(self.__coords) for coord_item in coords.items(): if coord_item[1] is not None: assert ( From afe9005d0fa3bafe64d469fee68f9f84153d8bb3 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 6 Jul 2023 12:17:15 +0200 Subject: [PATCH 05/51] some little progress in implementation of DXarray --- heat/dxarray/dxarray.py | 101 ++++++++++++++++++++++++++++++++++------ 1 file changed, 86 insertions(+), 15 deletions(-) diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py index 2db5c89ed0..d1bf64432f 100644 --- a/heat/dxarray/dxarray.py +++ b/heat/dxarray/dxarray.py @@ -33,7 +33,7 @@ def __init__( self, values: ht.DNDarray, dims: Union[list, None], - coords: dict, + coords: Union[dict, None], name: Union[str, None] = None, attrs: dict = {}, ): @@ -41,6 +41,8 @@ def __init__( self.__name = name self.__attrs = attrs + # check if names of dims are given (and whether their number fits the number of dims of the values array) + # if no names are provided, introduce generic names "dim_N", N = 0,1,... if dims is not None: assert len(self.__dims) == self.__values.ndim self.__dims = dims @@ -48,24 +50,59 @@ def __init__( self.__dims = ["dim_%d" % k for k in range(self.__values.ndim)] self.__dims = dims + # set attribute split: use dimension name instead of idx since we are in class DXarray instead of DNDarray + self.__split = self.__dim_idx_to_name(values.split) + + # check consistency of the coordinates provided + if coords is not None: + # go through all entries in the dictionary coords + for coord_item in coords.items(): + coord_item_dims = coord_item[0] + coord_item_coords = coord_item[1] + # first case: "classical" coordinates for a single dimension, sometimes referred to "logical coordinates" + if isinstance(coord_item_dims, str): + # here, the coordinates must be given by a one-dimensional DNDarray... + assert isinstance(coord_item_coords, ht.DNDarray) + assert coord_item_coords.ndim == 1 + # ... with matching device and communicator, ... + assert coord_item_coords.device == self.__values.device + assert coord_item_coords.comm == self.__values.comm + # ... correct shape, and ... + assert ( + coord_item_coords.gshape[0] + == self.__values.gshape[self.__dim_name_to_idx(coord_item_dims)] + ) + # ... that is split if and only if the coordinates refer to the split dimension of the DXarray + if coord_item_dims == self.__split: + assert coord_item_coords.split == 0 + else: + assert coord_item_coords.split is None + # second case: "physical coordinates" - two or more dimensions are "merged" together and equipped with a coordinate array + # that cannot be expressed as meshgrid of 1d coordinate arrays + elif isinstance(coord_item_dims, tuple(int)): + # now, the coordinates must be given as a DXarray... + assert isinstance(coord_item_coords, ht.DXarray) + # ... with matching dimension names, ... + assert coord_item_coords.dims == list(coord_item_dims) + # ... shape, ... + assert ( + coord_item_coords.values.gshape + == self.__values.gshape[self.__dim_name_to_idx(list(coord_item_dims))] + ) + # ... device and communicator, ... + assert coord_item_coords.device == self.__values.device + assert coord_item_coords.comm == self.__values.comm + # ... and split dimension. + if self.__split in coord_item_dims: + assert coord_item_coords.split == self.__split + else: + assert coord_item_coords.split is None + + # after the consistency checks, set the remaining attributes of the DXarray self.__coords = coords - self.__split = values.split self.__device = values.device self.__comm = values.comm - for coord_item in coords.items(): - if coord_item[1] is not None: - assert ( - isinstance(coord_item[1], ht.DNDarray) - and coord_item[1].device == self.__values.device - and coord_item[1].comm == self.__values.comm - ) - if coord_item[1].split is not None: - 0 == 1 - # ensure correct split dim... - # TODO: we need to introduce some additional consistency checks: compare communicator, devices, and split-dimension of DNDarrays contained in coords and of values... - # physical coordinate arrays must be split along same dim as values... - @property def values(self) -> ht.DNDarray: """ @@ -143,3 +180,37 @@ def attrs(self, attributes: dict): Set attributes of DXarray """ self.__attrs = attributes + + def __dim_name_to_idx(self, names: Union[str, tuple, list, None]): + """ + Converts a string (or tuple of strings) referring to dimensions of the DXarray to the corresponding numeric index (tuple of indices) of these dimensions. + Inverse of :meth:`__dim_idx_to_name`. + """ + if names is None: + return None + elif isinstance(names, str): + return self.__dims.index(names) + elif isinstance(names, tuple): + names_list = list(names) + return tuple([self.__dims.index(names) for name in names_list]) + elif isinstance(names, list): + return tuple([self.__dims.index(names) for name in names]) + else: + raise TypeError("Input must be None, string, list of strings, or tuple of strings.") + + def __dim_idx_to_name(self, idxs: Union[int, tuple, list, None]): + """ + Converts an numeric index (or tuple of such indices) referring to the dimensions of the DXarray to the corresponding name string (or tuple of name strings). + Inverse of :meth:`__dim_name_to_idx`. + """ + if idxs is None: + return None + elif isinstance(self, idxs): + return self.__dims[idxs] + elif isinstance(idxs, tuple): + idxs_list = list(idxs) + return tuple([self.__dims[idx] for idx in idxs_list]) + elif isinstance(idxs, list): + return tuple([self.__dims[idx] for idx in idxs]) + else: + raise TypeError("Input must be None, int, list of ints, or tuple of ints.") From ee4e751b8c20d8bf76dcbb18d23a47125a9716ec Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 6 Jul 2023 14:20:04 +0200 Subject: [PATCH 06/51] ... --- heat/dxarray/dxarray.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py index d1bf64432f..cc9014c04d 100644 --- a/heat/dxarray/dxarray.py +++ b/heat/dxarray/dxarray.py @@ -27,6 +27,13 @@ class DXarray: DXarray of same shape as the original one, also split along the same split axis ("physical coordinates"). split: Union[int,None] dimension along which the DXarray is split (analogous to split dimension of DNDarray) + + Notes + --------------- + Some attributes of DNDarray are not included in DXarray, e.g., gshape, lshape, larray etc., and need to be accessed by + DXarray.values.gshape etc. + This is in order to avoid confusion, because a DXarray is built of possibly several DNDarrays which could cause confusion + to which gshape etc. a global attribute DXarray.gshape could refer to. """ def __init__( From 9c8d044c9f645e5482f4876ed5427983c15951e5 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 6 Jul 2023 18:00:09 +0200 Subject: [PATCH 07/51] corrected some bugs, added printing of DXarray --- heat/__init__.py | 1 - heat/dxarray/dxarray.py | 131 +++++++++++++++++++++++++++++++++++----- 2 files changed, 116 insertions(+), 16 deletions(-) diff --git a/heat/__init__.py b/heat/__init__.py index 41856dcb7f..0cac56b609 100644 --- a/heat/__init__.py +++ b/heat/__init__.py @@ -17,4 +17,3 @@ from . import sparse from . import spatial from . import utils -from . import dxarray diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py index cc9014c04d..9e0f9e5156 100644 --- a/heat/dxarray/dxarray.py +++ b/heat/dxarray/dxarray.py @@ -2,9 +2,9 @@ Implements a distributed counterpart of xarray built on top of Heats DNDarray class """ +import torch import heat as ht - -# import xarray as xa +import xarray as xr from typing import Union __all__ = ["DXarray"] @@ -39,11 +39,14 @@ class DXarray: def __init__( self, values: ht.DNDarray, - dims: Union[list, None], - coords: Union[dict, None], + dims: Union[list, None] = None, + coords: Union[dict, None] = None, name: Union[str, None] = None, attrs: dict = {}, ): + """ + Constructor for DXarray class + """ self.__values = values self.__name = name self.__attrs = attrs @@ -51,11 +54,10 @@ def __init__( # check if names of dims are given (and whether their number fits the number of dims of the values array) # if no names are provided, introduce generic names "dim_N", N = 0,1,... if dims is not None: - assert len(self.__dims) == self.__values.ndim + assert len(dims) == self.__values.ndim self.__dims = dims else: self.__dims = ["dim_%d" % k for k in range(self.__values.ndim)] - self.__dims = dims # set attribute split: use dimension name instead of idx since we are in class DXarray instead of DNDarray self.__split = self.__dim_idx_to_name(values.split) @@ -86,16 +88,18 @@ def __init__( assert coord_item_coords.split is None # second case: "physical coordinates" - two or more dimensions are "merged" together and equipped with a coordinate array # that cannot be expressed as meshgrid of 1d coordinate arrays - elif isinstance(coord_item_dims, tuple(int)): + elif isinstance(coord_item_dims, tuple): # now, the coordinates must be given as a DXarray... - assert isinstance(coord_item_coords, ht.DXarray) + assert isinstance(coord_item_coords, DXarray) # ... with matching dimension names, ... assert coord_item_coords.dims == list(coord_item_dims) # ... shape, ... assert ( - coord_item_coords.values.gshape - == self.__values.gshape[self.__dim_name_to_idx(list(coord_item_dims))] - ) + torch.tensor(coord_item_coords.values.gshape) + == torch.tensor(self.__values.gshape)[ + self.__dim_name_to_idx(list(coord_item_dims)) + ] + ).all() # ... device and communicator, ... assert coord_item_coords.device == self.__values.device assert coord_item_coords.comm == self.__values.comm @@ -110,6 +114,18 @@ def __init__( self.__device = values.device self.__comm = values.comm + if self.__coords is not None: + self.__dims_with_coords = sum([list(it[0]) for it in coords.items()], []) + else: + self.__dims_with_coords = [] + self.__dims_without_coords = [ + dim for dim in self.__dims if dim not in self.__dims_with_coords + ] + + """ + Attribute getters and setters for the DXarray class + """ + @property def values(self) -> ht.DNDarray: """ @@ -166,6 +182,20 @@ def attrs(self) -> dict: """ return self.__attrs + @property + def dims_with_coordinates(self) -> list: + """ + Get list of dims with coordinates from DXarray + """ + return self.__dims_with_coordinates + + @property + def dims_without_coordinates(self) -> list: + """ + Get list of dims without coordinates from DXarray + """ + return self.__dims_without_coordinates + @values.setter def values(self, arr: ht.DNDarray): """ @@ -188,6 +218,10 @@ def attrs(self, attributes: dict): """ self.__attrs = attributes + """ + Methods of DXarray class + """ + def __dim_name_to_idx(self, names: Union[str, tuple, list, None]): """ Converts a string (or tuple of strings) referring to dimensions of the DXarray to the corresponding numeric index (tuple of indices) of these dimensions. @@ -199,9 +233,9 @@ def __dim_name_to_idx(self, names: Union[str, tuple, list, None]): return self.__dims.index(names) elif isinstance(names, tuple): names_list = list(names) - return tuple([self.__dims.index(names) for name in names_list]) + return tuple([self.__dims.index(name) for name in names_list]) elif isinstance(names, list): - return tuple([self.__dims.index(names) for name in names]) + return [self.__dims.index(name) for name in names] else: raise TypeError("Input must be None, string, list of strings, or tuple of strings.") @@ -212,12 +246,79 @@ def __dim_idx_to_name(self, idxs: Union[int, tuple, list, None]): """ if idxs is None: return None - elif isinstance(self, idxs): + elif isinstance(idxs, int): return self.__dims[idxs] elif isinstance(idxs, tuple): idxs_list = list(idxs) return tuple([self.__dims[idx] for idx in idxs_list]) elif isinstance(idxs, list): - return tuple([self.__dims[idx] for idx in idxs]) + return [self.__dims[idx] for idx in idxs] else: raise TypeError("Input must be None, int, list of ints, or tuple of ints.") + + def __repr__(self) -> str: + if self.__name is not None: + print_name = self.__name + else: + print_name = "" + print_values = self.__values.__repr__() + print_dimensions = ", ".join(self.__dims) + if self.__split is not None: + print_split = self.__split + else: + print_split = "None (no splitted)" + if self.__coords is not None: + print_coords = "\n".join( + [it[0].__repr__() + ": \t" + it[1].__repr__() for it in self.__coords.items()] + ) + print_coords = 'Coordinates of "' + print_name + '": ' + print_coords + else: + print_coords = "" + print_attributes = "\n".join( + ["\t" + it[0].__repr__() + ": \t" + it[1].__repr__() for it in self.__attrs.items()] + ) + if len(self.__dims_without_coords) != 0: + print_coordinates_without_dims = "".join( + [ + 'The remaining coordinates of "', + print_name, + '", ', + ", ".join(self.__dims_without_coords), + ", do not have coordinates. \n", + ] + ) + else: + print_coordinates_without_dims = "" + if self.__comm.rank == 0: + return "".join( + [ + 'DXarray with name "', + print_name, + '"\n', + 'Dimensions of "', + print_name, + '": ', + print_dimensions, + "\n", + 'Split dimension of "', + print_name, + '": ', + print_split, + "\n", + 'Values of "', + print_name, + '": ', + print_values, + "\n", + print_coords, + "\n", + print_coordinates_without_dims, + 'Attributes of "', + print_name, + '":', + print_attributes, + "\n\n", + ] + ) + else: + return "" From 966d56c84abc8a70856106274290857eeeaa6d6a Mon Sep 17 00:00:00 2001 From: Hoppe Date: Fri, 7 Jul 2023 11:35:50 +0200 Subject: [PATCH 08/51] shifted some validation routines to file `dxarray_sanitation.py` --- heat/dxarray/__init__.py | 10 +- heat/dxarray/dxarray.py | 206 ++++++++++++++------------ heat/dxarray/dxarray_manipulations.py | 8 + heat/dxarray/dxarray_sanitation.py | 166 +++++++++++++++++++++ 4 files changed, 292 insertions(+), 98 deletions(-) create mode 100644 heat/dxarray/dxarray_manipulations.py create mode 100644 heat/dxarray/dxarray_sanitation.py diff --git a/heat/dxarray/__init__.py b/heat/dxarray/__init__.py index fbc04925fd..73644d9223 100644 --- a/heat/dxarray/__init__.py +++ b/heat/dxarray/__init__.py @@ -1,5 +1,7 @@ -""" -import into heat.dxarray namespace -""" - +""" +import into heat.dxarray namespace +""" + from .dxarray import * +from .dxarray_sanitation import * +from .dxarray_manipulations import * diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py index 9e0f9e5156..ae49633da0 100644 --- a/heat/dxarray/dxarray.py +++ b/heat/dxarray/dxarray.py @@ -7,9 +7,50 @@ import xarray as xr from typing import Union +# imports of "dxarray_..."-dependencies at the end to avoid cyclic dependence + __all__ = ["DXarray"] +# Auxiliary functions + + +def dim_name_to_idx(dims: list, names: Union[str, tuple, list, None]) -> Union[int, tuple, list]: + """ + Converts a string "names" (or tuple of strings) referring to dimensions stored in "dims" to the corresponding numeric index (tuple of indices) of these dimensions. + Inverse of :func:`dim_idx_to_name`. + """ + if names is None: + return None + elif isinstance(names, str): + return dims.index(names) + elif isinstance(names, tuple): + names_list = list(names) + return tuple([dims.index(name) for name in names_list]) + elif isinstance(names, list): + return [dims.index(name) for name in names] + else: + raise TypeError("Input names must be None, string, list of strings, or tuple of strings.") + + +def dim_idx_to_name(dims: list, idxs: Union[int, tuple, list, None]) -> Union[str, tuple, list]: + """ + Converts an numeric index "idxs" (or tuple of such indices) referring to the dimensions stored in "dims" to the corresponding name string (or tuple of name strings). + Inverse of :func:`dim_name_to_idx`. + """ + if idxs is None: + return None + elif isinstance(idxs, int): + return dims[idxs] + elif isinstance(idxs, tuple): + idxs_list = list(idxs) + return tuple([dims[idx] for idx in idxs_list]) + elif isinstance(idxs, list): + return [dims[idx] for idx in idxs] + else: + raise TypeError("Input idxs must be None, int, list of ints, or tuple of ints.") + + class DXarray: """ Distributed counterpart of xarray. @@ -34,6 +75,7 @@ class DXarray: DXarray.values.gshape etc. This is in order to avoid confusion, because a DXarray is built of possibly several DNDarrays which could cause confusion to which gshape etc. a global attribute DXarray.gshape could refer to. + Currently, it is checked whether values and coords are on the same `device`; in principle, this is unnecessary. """ def __init__( @@ -47,80 +89,48 @@ def __init__( """ Constructor for DXarray class """ + # Check compatibility of the input arguments + dxarray_sanitation.check_compatibility_values_dims_coords(values, dims, coords) + dxarray_sanitation.check_name(name) + dxarray_sanitation.check_attrs(attrs) + + # after the checks, set the directly given attributes... + self.__values = values self.__name = name self.__attrs = attrs + self.__coords = coords + self.__device = values.device + self.__comm = values.comm - # check if names of dims are given (and whether their number fits the number of dims of the values array) - # if no names are provided, introduce generic names "dim_N", N = 0,1,... - if dims is not None: - assert len(dims) == self.__values.ndim - self.__dims = dims + # ... and determine those not directly given: + # since we are in the DXarray class, split dimension is given by a string + self.__split = dim_idx_to_name(dims, values.split) + + # determine dimensions with and without coordinates + if coords is not None: + dims_with_coords = sum([list(it[0]) for it in coords.items()], []) else: - self.__dims = ["dim_%d" % k for k in range(self.__values.ndim)] + dims_with_coords = [] + dims_without_coords = [dim for dim in dims if dim not in dims_with_coords] - # set attribute split: use dimension name instead of idx since we are in class DXarray instead of DNDarray - self.__split = self.__dim_idx_to_name(values.split) + self.__dims_with_cooords = dims_with_coords + self.__dims_without_coords = dims_without_coords - # check consistency of the coordinates provided + # check if all appearing DNDarrays are balanced: as a result, the DXarray is balanced if and only if all DNDarrays are balanced if coords is not None: - # go through all entries in the dictionary coords - for coord_item in coords.items(): - coord_item_dims = coord_item[0] - coord_item_coords = coord_item[1] - # first case: "classical" coordinates for a single dimension, sometimes referred to "logical coordinates" - if isinstance(coord_item_dims, str): - # here, the coordinates must be given by a one-dimensional DNDarray... - assert isinstance(coord_item_coords, ht.DNDarray) - assert coord_item_coords.ndim == 1 - # ... with matching device and communicator, ... - assert coord_item_coords.device == self.__values.device - assert coord_item_coords.comm == self.__values.comm - # ... correct shape, and ... - assert ( - coord_item_coords.gshape[0] - == self.__values.gshape[self.__dim_name_to_idx(coord_item_dims)] - ) - # ... that is split if and only if the coordinates refer to the split dimension of the DXarray - if coord_item_dims == self.__split: - assert coord_item_coords.split == 0 - else: - assert coord_item_coords.split is None - # second case: "physical coordinates" - two or more dimensions are "merged" together and equipped with a coordinate array - # that cannot be expressed as meshgrid of 1d coordinate arrays - elif isinstance(coord_item_dims, tuple): - # now, the coordinates must be given as a DXarray... - assert isinstance(coord_item_coords, DXarray) - # ... with matching dimension names, ... - assert coord_item_coords.dims == list(coord_item_dims) - # ... shape, ... - assert ( - torch.tensor(coord_item_coords.values.gshape) - == torch.tensor(self.__values.gshape)[ - self.__dim_name_to_idx(list(coord_item_dims)) - ] - ).all() - # ... device and communicator, ... - assert coord_item_coords.device == self.__values.device - assert coord_item_coords.comm == self.__values.comm - # ... and split dimension. - if self.__split in coord_item_dims: - assert coord_item_coords.split == self.__split - else: - assert coord_item_coords.split is None - - # after the consistency checks, set the remaining attributes of the DXarray - self.__coords = coords - self.__device = values.device - self.__comm = values.comm + balanced = values.balanced and all( + [coord_item[1].balanced for coord_item in coords.items()] + ) + else: + balanced = values.balanced + self.__balanced = balanced - if self.__coords is not None: - self.__dims_with_coords = sum([list(it[0]) for it in coords.items()], []) + # if no names are provided, introduce generic names "dim_N", N = 0,1,... + if dims is None: + self.__dims = ["dim_%d" % k for k in range(self.__values.ndim)] else: - self.__dims_with_coords = [] - self.__dims_without_coords = [ - dim for dim in self.__dims if dim not in self.__dims_with_coords - ] + self.__dims = dims """ Attribute getters and setters for the DXarray class @@ -196,30 +206,51 @@ def dims_without_coordinates(self) -> list: """ return self.__dims_without_coordinates + @property + def balanced(self) -> bool: + """ + Check whether all DNDarrays in DXarray are balanced + """ + return self.__balanced + @values.setter - def values(self, arr: ht.DNDarray): + def values(self, newvalues: ht.DNDarray): """ Set value array of DXarray """ - # TODO: perform some consistency checks... - self.__values = arr + dxarray_sanitation.check_compatibility_values_dims_coords( + newvalues, self.__dims, self.__coords + ) + self.__values = newvalues + + @coords.setter + def coors(self, newcoords: Union[dict, None]): + """ + Set coordinates of DXarray + """ + dxarray_sanitation.check_compatibility_values_dims_coords( + self.__values, self.__dims, newcoords + ) + self.__coords = newcoords @name.setter - def name(self, name: str): + def name(self, newname: Union[str, None]): """ Set name of DXarray """ - self.__name = name + dxarray_sanitation.check_name(newname) + self.__name = newname @attrs.setter - def attrs(self, attributes: dict): + def attrs(self, newattrs: Union[dict, None]): """ Set attributes of DXarray """ - self.__attrs = attributes + dxarray_sanitation.check_attrs(newattrs) + self.__attrs = newattrs """ - Methods of DXarray class + Private methods of DXarray class """ def __dim_name_to_idx(self, names: Union[str, tuple, list, None]): @@ -227,36 +258,19 @@ def __dim_name_to_idx(self, names: Union[str, tuple, list, None]): Converts a string (or tuple of strings) referring to dimensions of the DXarray to the corresponding numeric index (tuple of indices) of these dimensions. Inverse of :meth:`__dim_idx_to_name`. """ - if names is None: - return None - elif isinstance(names, str): - return self.__dims.index(names) - elif isinstance(names, tuple): - names_list = list(names) - return tuple([self.__dims.index(name) for name in names_list]) - elif isinstance(names, list): - return [self.__dims.index(name) for name in names] - else: - raise TypeError("Input must be None, string, list of strings, or tuple of strings.") + return dim_name_to_idx(self.__dims, names) def __dim_idx_to_name(self, idxs: Union[int, tuple, list, None]): """ Converts an numeric index (or tuple of such indices) referring to the dimensions of the DXarray to the corresponding name string (or tuple of name strings). Inverse of :meth:`__dim_name_to_idx`. """ - if idxs is None: - return None - elif isinstance(idxs, int): - return self.__dims[idxs] - elif isinstance(idxs, tuple): - idxs_list = list(idxs) - return tuple([self.__dims[idx] for idx in idxs_list]) - elif isinstance(idxs, list): - return [self.__dims[idx] for idx in idxs] - else: - raise TypeError("Input must be None, int, list of ints, or tuple of ints.") + return dim_idx_to_name(self.__dims, idxs) def __repr__(self) -> str: + """ + Representation of DXarray as string. Required for printing. + """ if self.__name is not None: print_name = self.__name else: @@ -322,3 +336,7 @@ def __repr__(self) -> str: ) else: return "" + + +from . import dxarray_sanitation +from . import dxarray_manipulations diff --git a/heat/dxarray/dxarray_manipulations.py b/heat/dxarray/dxarray_manipulations.py new file mode 100644 index 0000000000..d105862cd7 --- /dev/null +++ b/heat/dxarray/dxarray_manipulations.py @@ -0,0 +1,8 @@ +""" +Manipulation routines for the DXarray class +""" + +import torch +import heat as ht + +from .dxarray import DXarray diff --git a/heat/dxarray/dxarray_sanitation.py b/heat/dxarray/dxarray_sanitation.py new file mode 100644 index 0000000000..1e622a31ac --- /dev/null +++ b/heat/dxarray/dxarray_sanitation.py @@ -0,0 +1,166 @@ +""" +Validation/Sanitation routines for the DXarray class +""" + +import torch +import heat as ht +from typing import Any, Union + +from .dxarray import DXarray, dim_name_to_idx, dim_idx_to_name + + +def check_compatibility_values_dims_coords( + values: ht.DNDarray, dims: Union[list, None], coords: Union[dict, None] +): + """ + Checks whether input values, dims, and coords are valid and compatible inputs for a DXarray + """ + if not isinstance(values, ht.DNDarray): + raise TypeError("Input `values` must be a DNDarray, but is ", type(values), ".") + if not (isinstance(dims, list) or dims is None): + raise TypeError("Input `dims` must be a list or None, but is ", type(dims), ".") + if not (isinstance(coords, dict) or coords is None): + raise TypeError("Input `coords` must be a dictionary or None, but is ", type(coords), ".") + + # check if names of dims are given (and whether their number fits the number of dims of the values array) + if dims is not None: + if len(dims) != values.ndim: + raise ValueError( + "Number of dimension names in `dims` (=%d) must be equal to number of dimensions of `values` array (=%d)." + % (len(dims), values.ndim) + ) + + # check consistency of the coordinates provided + if coords is not None: + # go through all entries in the dictionary coords + for coord_item in coords.items(): + coord_item_dims = coord_item[0] + coord_item_coords = coord_item[1] + # first case: "classical" coordinates for a single dimension, sometimes referred to "logical coordinates" + if isinstance(coord_item_dims, str): + # here, the coordinates must be given by a one-dimensional DNDarray... + if not isinstance(coord_item_coords, ht.DNDarray): + raise TypeError( + "Coordinate arrays (i.e. entries of `coords`) for single dimension must be DNDarray. Here, type ", + type(coord_item_coords), + " is given for dimension ", + coord_item_dims, + ".", + ) + if not coord_item_coords.ndim == 1: + raise ValueError( + "Coordinate arrays for a single dimension must have dimension 1, but coordinate array for dimension ", + coord_item_dims, + " has dimension %d." % coord_item_coords.ndim, + ) + # ... with matching device and communicator, ... + if not coord_item_coords.device == values.device: + raise RuntimeError( + "Device of coordinate array for dimension ", + coord_item_dims, + "does not coincide with device for `values`.", + ) + if not coord_item_coords.comm == values.comm: + raise RuntimeError( + "Communicator of coordinate array for dimension ", + coord_item_dims, + "does not coincide with device for `values`.", + ) + # ... correct shape, and ... + if not ( + coord_item_coords.gshape[0] + == values.gshape[dim_name_to_idx(dims, coord_item_dims)] + ): + raise ValueError( + "Size of `values` in dimension ", + coord_item_dims, + " does not coincide with size of coordinate array in this dimension.", + ) + # ... that is split if and only if the coordinates refer to the split dimension of the DXarray + if coord_item_dims == dim_idx_to_name(dims, values.split): + if coord_item_coords.split != 0: + raise ValueError( + "`values` array is split along dimension ", + coord_item_dims, + ", but cooresponding coordinate array is not split along this dimension.", + ) + else: + if coord_item_coords.split is not None: + raise ValueError( + "`values` array is not split along dimension ", + coord_item_dims, + ", but cooresponding coordinate array is split along this dimension.", + ) + # second case: "physical coordinates" - two or more dimensions are "merged" together and equipped with a coordinate array + # that cannot be expressed as meshgrid of 1d coordinate arrays + elif isinstance(coord_item_dims, tuple): + # now, the coordinates must be given as a DXarray... + if not isinstance(coord_item_coords, DXarray): + raise TypeError( + "Coordinate arrays (i.e. entries of `coords`) must be DXarrays. Here, type ", + type(coord_item_coords), + " is given for dimensions ", + coord_item_dims, + ".", + ) + # ... with matching dimension names, ... + if coord_item_coords.dims != list(coord_item_dims): + raise ValueError( + "Dimension names of coordinate-DXarray and the corresponding dimension names in `coords` must be equal." + ) + # ... shape, ... + if not ( + torch.tensor(coord_item_coords.values.gshape) + == torch.tensor(values.gshape)[dim_name_to_idx(dims, list(coord_item_dims))] + ).all(): + raise ValueError( + "Size of `values` in dimensions ", + coord_item_dims, + " does not coincide with size of coordinate array in these dimensions.", + ) + # ... device and communicator, ... + if not coord_item_coords.device == values.device: + raise RuntimeError( + "Device of coordinate array for dimensions ", + coord_item_dims, + "does not coincide with device for `values`.", + ) + if not coord_item_coords.comm == values.comm: + raise RuntimeError( + "Communicator of coordinate array for dimensions ", + coord_item_dims, + "does not coincide with device for `values`.", + ) + # ... and split dimension. + if dim_idx_to_name(dims, values.split) in coord_item_dims: + if not coord_item_coords.split == dim_idx_to_name(dims, values.split): + raise ValueError( + "`values` array is split along dimension ", + coord_item_dims, + ", but cooresponding coordinate array is not split along ", + coord_item_coords.split, + ".", + ) + else: + if coord_item_coords.split is not None: + raise ValueError( + "`values` array is not split along dimensions ", + coord_item_dims, + ", but cooresponding coordinate array is split.", + ) + + +def check_name(name: Any): + """ + Checks whether input is appropriate for attribute `name` of `DXarray` + """ + if not (isinstance(name, str) or name is None): + raise TypeError("`name` must be a string or None, but is ", type(name), ".") + + +def check_attrs(attrs: Any): + """ + Checks whether input is appropriate for attributed `attrs` of `DXarray`. + """ + if not (isinstance(attrs, dict) or attrs is None): + raise TypeError("`attrs` must be a dictionary or None, but is ", type(attrs), ".") From fb07a4e5af52234a9a01460a02e9730f407ce144 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Fri, 7 Jul 2023 15:48:50 +0200 Subject: [PATCH 09/51] added `resplit_` and `balance_`... still working on `.xarray()` --- heat/dxarray/dxarray.py | 100 +++++++++++++++++++++++++---- heat/dxarray/dxarray_sanitation.py | 18 ++++++ 2 files changed, 107 insertions(+), 11 deletions(-) diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py index ae49633da0..51131d6c65 100644 --- a/heat/dxarray/dxarray.py +++ b/heat/dxarray/dxarray.py @@ -5,6 +5,7 @@ import torch import heat as ht import xarray as xr +from xarray import DataArray from typing import Union # imports of "dxarray_..."-dependencies at the end to avoid cyclic dependence @@ -118,13 +119,7 @@ def __init__( self.__dims_without_coords = dims_without_coords # check if all appearing DNDarrays are balanced: as a result, the DXarray is balanced if and only if all DNDarrays are balanced - if coords is not None: - balanced = values.balanced and all( - [coord_item[1].balanced for coord_item in coords.items()] - ) - else: - balanced = values.balanced - self.__balanced = balanced + self.__balanced = dxarray_sanitation.check_if_balanced(self.__values, self.__coords) # if no names are provided, introduce generic names "dim_N", N = 0,1,... if dims is None: @@ -158,7 +153,7 @@ def coords(self) -> dict: return self.__coords @property - def split(self) -> Union[int, None]: + def split(self) -> Union[str, None]: """ Get split dimension from DXarray """ @@ -209,7 +204,9 @@ def dims_without_coordinates(self) -> list: @property def balanced(self) -> bool: """ - Check whether all DNDarrays in DXarray are balanced + Get the attributed `balanced` of DXarray. + Does not check whether the current value of this attribute is consistent! + (This can be ensured by calling :meth:`DXarray.is_balanced(force_check=True)` first.) """ return self.__balanced @@ -253,14 +250,18 @@ def attrs(self, newattrs: Union[dict, None]): Private methods of DXarray class """ - def __dim_name_to_idx(self, names: Union[str, tuple, list, None]): + def __dim_name_to_idx( + self, names: Union[str, tuple, list, None] + ) -> Union[str, tuple, list, None]: """ Converts a string (or tuple of strings) referring to dimensions of the DXarray to the corresponding numeric index (tuple of indices) of these dimensions. Inverse of :meth:`__dim_idx_to_name`. """ return dim_name_to_idx(self.__dims, names) - def __dim_idx_to_name(self, idxs: Union[int, tuple, list, None]): + def __dim_idx_to_name( + self, idxs: Union[int, tuple, list, None] + ) -> Union[int, tuple, list, None]: """ Converts an numeric index (or tuple of such indices) referring to the dimensions of the DXarray to the corresponding name string (or tuple of name strings). Inverse of :meth:`__dim_name_to_idx`. @@ -337,6 +338,83 @@ def __repr__(self) -> str: else: return "" + """ + Public Methods of DXarray + """ + + def is_balanced(self, force_check: bool = False) -> bool: + """ + Checks if DXarray is balanced. If `force_check = False` (default), the current value of the + attribute `balanced` is returned unless this current value is None (i.e. no information on + no information available); only in the latter case, or if `force_check = True`, the value + of the attribute `balanced` is updated before being returned. + + """ + if self.__balanced is None or force_check: + self.__balanced = dxarray_sanitation.check_if_balanced(self.__values, self.__coords) + return self.__balanced + + def resplit_(self, dim: Union[str, None] = None): + """ + In-place option for resplitting a :class:`DXarray`. + """ + if dim is not None and dim not in self.__dims: + raise ValueError( + "Input `dim` in resplit_ must be either None or a dimension of the underlying DXarray." + ) + # early out if nothing is to do + if self.__split == dim: + return self + else: + # resplit the value array accordingly + self.__values.resplit_(self.__dim_name_to_idx(dim)) + if self.__coords is not None: + for item in self.__coords.items(): + if isinstance(item[0], str) and item[0] == dim: + item[1].resplit_(0) + elif isinstance(item[0], tuple) and dim in item[0]: + item[1].resplit_(dim) + self.__split = dim + return self + + def balance_(self): + """ + In-place option for balancing a :class:`DXarray`. + """ + if self.is_balanced(force_check=True): + return self + else: + self.__values.balance_() + if self.__coords is not None: + for item in self.__coords.items(): + item[1].balance_() + self.__balanced = True + return self + + def xarray(self): + """ + Convert given DXarray (possibly distributed over some processes) to a non-distributed xarray (:class:`xarray.DataArray`) + """ + non_dist_copy = self.resplit_(None) + if non_dist_copy.coords is None: + xarray_coords = None + else: + xarray_coords = { + item[0]: item[1].cpu().numpy() + if isinstance(item[1], ht.DNDarray) + else item[1].xarray() + for item in non_dist_copy.coords.items() + } + xarray = DataArray( + non_dist_copy.values.cpu().numpy(), + dims=non_dist_copy.dims, + coords=xarray_coords, + name=non_dist_copy.name, + attrs=non_dist_copy.attrs, + ) + del non_dist_copy + return xarray + from . import dxarray_sanitation from . import dxarray_manipulations diff --git a/heat/dxarray/dxarray_sanitation.py b/heat/dxarray/dxarray_sanitation.py index 1e622a31ac..775b546595 100644 --- a/heat/dxarray/dxarray_sanitation.py +++ b/heat/dxarray/dxarray_sanitation.py @@ -164,3 +164,21 @@ def check_attrs(attrs: Any): """ if not (isinstance(attrs, dict) or attrs is None): raise TypeError("`attrs` must be a dictionary or None, but is ", type(attrs), ".") + + +def check_if_balanced(values: ht.DNDarray, coords: Union[dict, None]): + """ + Checks if a DXarray with values and coords is balanced, i.e., equally distributed on each process + A DXarray is balanced if and only if all underlying DNDarrays are balanced. + """ + if values.balanced is None: + return None + else: + if coords is not None: + if None in [coord_item[1].balanced for coord_item in coords.items()]: + return None + else: + balanced = values.balanced and all( + [coord_item[1].balanced for coord_item in coords.items()] + ) + return balanced From 0cdfcd10173ff01b467ebd90607224d1fa101e44 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Fri, 7 Jul 2023 17:22:07 +0200 Subject: [PATCH 10/51] added `.xarray()` for conversion heat.DXarray -> xarray.DataArray, conversion other way round (xarray -> DXarray) does not work so far... --- heat/dxarray/dxarray.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py index 51131d6c65..fe77837c3d 100644 --- a/heat/dxarray/dxarray.py +++ b/heat/dxarray/dxarray.py @@ -10,7 +10,7 @@ # imports of "dxarray_..."-dependencies at the end to avoid cyclic dependence -__all__ = ["DXarray"] +__all__ = ["DXarray", "from_xarray"] # Auxiliary functions @@ -416,5 +416,33 @@ def xarray(self): return xarray +def from_xarray( + xarray: xr.DataArray, + split: Union[str, None] = None, + device: ht.Device = None, + comm: ht.Communication = None, +) -> DXarray: + """ + Generates a DXarray from a given xarray (:class:`xarray.DataArray`) + """ + coords_dict = { + item[0]: item[1].values if len(item[0]) == 1 else item[1] for item in xarray.coords.items() + } + print(coords_dict) + dxarray = DXarray( + ht.DNDarray(torch.from_numpy(xarray.values), device=device, comm=comm), + dims=list(xarray.dims), + coords=coords_dict, + name=xarray.name, + attrs=xarray.attrs, + ) + if split is not None: + if split not in dxarray.dims: + raise ValueError('split dimension "', split, '" is not a dimension of input array.') + else: + dxarray.resplit_(split) + return dxarray + + from . import dxarray_sanitation from . import dxarray_manipulations From 3c1a62ca9528a1a3e2fcae32e32593e66eb53365 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Mon, 10 Jul 2023 14:59:34 +0200 Subject: [PATCH 11/51] added routine `from_numpy` for DNDarrays (similar to `torch.from_numpy`) and completed `from_xarray()` for DXarrays --- heat/core/manipulations.py | 32 +++++++++++++++++++++++++++++++- heat/dxarray/dxarray.py | 14 +++++++++++--- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/heat/core/manipulations.py b/heat/core/manipulations.py index cc3c738e7f..e16aec6205 100644 --- a/heat/core/manipulations.py +++ b/heat/core/manipulations.py @@ -9,7 +9,7 @@ from typing import Iterable, Type, List, Callable, Union, Tuple, Sequence, Optional -from .communication import MPI +from .communication import MPI, sanitize_comm, Communication from .dndarray import DNDarray from . import arithmetics @@ -21,6 +21,7 @@ from . import tiling from . import types from . import _operations +from . import devices __all__ = [ "balance", @@ -36,6 +37,7 @@ "flip", "fliplr", "flipud", + "from_numpy", "hsplit", "hstack", "moveaxis", @@ -1094,6 +1096,34 @@ def flipud(a: DNDarray) -> DNDarray: return flip(a, 0) +def from_numpy( + x: np.ndarray, + split: Optional[int] = None, + device: Optional[Union[str, devices.Device]] = None, + comm: Optional[Communication] = None, +) -> DNDarray: + """ + Creates DNDarray from given NumPy Array. The data type is determined by the data type of the Numpy Array. + Split-dimension, device and communicator can be prescribed as usual. + Inverse of :meth:`DNDarray.numpy()`. + """ + dtype = types.canonical_heat_type(x.dtype) + device = devices.sanitize_device(device) + comm = sanitize_comm(comm) + xht = DNDarray( + torch.from_numpy(x).to(device.torch_device), + x.shape, + dtype=dtype, + split=None, + device=device, + comm=comm, + balanced=True, + ) + if split is not None: + xht.resplit_(split) + return xht + + def hsplit(x: DNDarray, indices_or_sections: Iterable) -> List[DNDarray, ...]: """ Split array into multiple sub-DNDarrays along the 2nd axis (horizontally/column-wise). diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py index fe77837c3d..fefe0d7511 100644 --- a/heat/dxarray/dxarray.py +++ b/heat/dxarray/dxarray.py @@ -426,11 +426,19 @@ def from_xarray( Generates a DXarray from a given xarray (:class:`xarray.DataArray`) """ coords_dict = { - item[0]: item[1].values if len(item[0]) == 1 else item[1] for item in xarray.coords.items() + item[0]: ht.from_numpy(item[1].values, device=device, comm=comm) + if len(item[0]) == 1 + else DXarray( + ht.from_numpy(item[1].values, device=device, comm=comm), + dims=list(item[0]), + coords=None, + name=item[1].name.__str__(), + attrs=item[1].attrs, + ) + for item in xarray.coords.items() } - print(coords_dict) dxarray = DXarray( - ht.DNDarray(torch.from_numpy(xarray.values), device=device, comm=comm), + ht.from_numpy(xarray.values, device=device, comm=comm), dims=list(xarray.dims), coords=coords_dict, name=xarray.name, From 316105b5dd0ae354cd1760f544e4ffe83adc9d94 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Wed, 26 Jul 2023 14:32:41 +0200 Subject: [PATCH 12/51] installation of xarray in the ci workflow --- .github/workflows/benchmark_main.yml | 1 + .github/workflows/benchmark_pr.yml | 1 + .github/workflows/ci.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/benchmark_main.yml b/.github/workflows/benchmark_main.yml index a6b1bd41bf..c226d36086 100644 --- a/.github/workflows/benchmark_main.yml +++ b/.github/workflows/benchmark_main.yml @@ -21,6 +21,7 @@ jobs: - name: Test run: | pip install torch==1.12.1+cpu torchvision==0.13.1+cpu torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html + pip install xarray pip install .[cb] PERUN_RUN_ID=N4 mpirun -n 4 python benchmarks/cb/main.py jq -s flatten bench_data/*.json > bench_data/all_benchmarks.json diff --git a/.github/workflows/benchmark_pr.yml b/.github/workflows/benchmark_pr.yml index 9b4900dcaa..db2271982f 100644 --- a/.github/workflows/benchmark_pr.yml +++ b/.github/workflows/benchmark_pr.yml @@ -22,6 +22,7 @@ jobs: - name: Test run: | pip install torch==1.12.1+cpu torchvision==0.13.1+cpu torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html + pip install xarray pip install .[cb] PERUN_RUN_ID=N4 mpirun -n 4 python benchmarks/cb/main.py jq -s flatten bench_data/*.json > bench_data/all_benchmarks.json diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4da49b8e33..db4f47b98e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -52,5 +52,6 @@ jobs: pip install pytest pip install ${{ matrix.pytorch-version }} --extra-index-url https://download.pytorch.org/whl/cpu pip install ${{ matrix.install-options }} + pip install xarray mpirun -n 3 pytest heat/ mpirun -n 4 pytest heat/ From 913149773ce7d159076cb4af99d277b994af9daa Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 27 Jul 2023 13:08:58 +0200 Subject: [PATCH 13/51] refactoring --- heat/dxarray/dxarray.py | 1 - 1 file changed, 1 deletion(-) diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py index fefe0d7511..13562736c7 100644 --- a/heat/dxarray/dxarray.py +++ b/heat/dxarray/dxarray.py @@ -12,7 +12,6 @@ __all__ = ["DXarray", "from_xarray"] - # Auxiliary functions From 960bf6f90a835863dd24e9f7b4209e3ded1cc649 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 27 Jul 2023 15:45:53 +0200 Subject: [PATCH 14/51] added installation of xarray in ReceivePR.yml --- .github/workflows/ReceivePR.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ReceivePR.yml b/.github/workflows/ReceivePR.yml index 89d058ad31..7749056b8a 100644 --- a/.github/workflows/ReceivePR.yml +++ b/.github/workflows/ReceivePR.yml @@ -27,6 +27,7 @@ jobs: - name: Test run: | pip install .[dev] + pip install xarray pre-commit run --all-files python -m unittest From 1dbd6c3d07d76506c384c01bbeb974cb72da67c2 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Mon, 7 Aug 2023 16:42:04 +0200 Subject: [PATCH 15/51] hint to enabling edits from maintainers --- quick_start.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/quick_start.md b/quick_start.md index 09b4c68101..443177639d 100644 --- a/quick_start.md +++ b/quick_start.md @@ -69,7 +69,7 @@ Local torch tensor on rank 1 : tensor([5, 6, 7, 8, 9], dtype=torch.int32) 1. Pick an Issue you'd like to work on. Check out [Good First Issues](https://github.com/helmholtz-analytics/heat/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22), start from most recent. Get in touch and ask to be assigned to the issue. -2. **IMPORTANT:** As soon as an issue is assigned, a new branch will be created (a comment will be posted under the relevant issue). Do use this branch to make your changes, it has been checked out from the correct base branch (i.e. `main` for new features, `release/*` for bug fixes). +2. **IMPORTANT:** As soon as an issue is assigned, a new branch will be created (a comment will be posted under the relevant issue). Do use this branch to make your changes, it has been checked out from the correct source branch (i.e. `main` for new features, `release/*` for bug fixes). 3. [Fork](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) or, if you have write access, clone the [Heat repository](https://github.com/helmholtz-analytics/heat). @@ -120,7 +120,7 @@ Local torch tensor on rank 1 : tensor([5, 6, 7, 8, 9], dtype=torch.int32) ``` -7. After [making and pushing](https://docs.github.com/en/get-started/quickstart/contributing-to-projects#making-and-pushing-changes) your changes, go ahead and [create a Pull Request](https://docs.github.com/en/get-started/quickstart/contributing-to-projects#making-a-pull-request). Make sure you go through the Due Diligence checklist (part of our PR template). +7. After [making and pushing](https://docs.github.com/en/get-started/quickstart/contributing-to-projects#making-and-pushing-changes) your changes, go ahead and [create a Pull Request](https://docs.github.com/en/get-started/quickstart/contributing-to-projects#making-a-pull-request). Make sure you go through the Due Diligence checklist (part of our PR template). Consider [allowing us to edit your branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/allowing-changes-to-a-pull-request-branch-created-from-a-fork#enabling-repository-maintainer-permissions-on-existing-pull-requests) for a smoother review process. ## Thank you so much for your time! From df1a404751b38090ec6a208eb5583e2076897638 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Mon, 7 Aug 2023 17:10:03 +0200 Subject: [PATCH 16/51] update branch creation instructions --- contributing.md | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/contributing.md b/contributing.md index b0ae45d0be..8f638d73a5 100644 --- a/contributing.md +++ b/contributing.md @@ -6,7 +6,7 @@ Thank you for your interest in contributing to Heat, we really appreciate your t * To set up your environment for Heat development, follow these [instructions](README.md#Hacking). * We strongly recommend getting in touch with the core developers, either here on GitHub (by filing and/or commenting on an Issue) or on [Mattermost](https://mattermost.hzdr.de/signup_user_complete/?id=3sixwk9okpbzpjyfrhen5jpqfo), before starting to work on a contribution. We are a small team and it's good to know who is currently working on what. * Our git workflow is described in a lot of detail [below](#developing-contributions). - * **TL;DR for experts:** + * **TL;DR for experts:** (Also check out [Quick Start](quick_start.md#new-contributors)) 1. `git add`, `pre-commit run --all-files` and `git commit` as needed; 2. `git rebase -i main` to rebase and tidy up your commits; 3. `git push` to publish to the remote repository. @@ -66,11 +66,7 @@ also install the pre-commit hook with pre-commit install ```` -* Create a branch for the feature you want to work on. Since the branch name will appear in the merge message, use a sensible name. The naming scheme is as follows `-`, where the kind of the contribution should be *features* for an entirely new feature, *bug* for, well, a bug and *enhancement* for things like performance optimizations for example. Please make sure that *NAME* very briefly summarizes the content of your contribution. - -``` -git checkout -b features/123-boolean-operators -``` +* **NEW** As of Aug 2023, as soon as an issue is assigned, a branch is created and its name posted in a comment under the original issue. **Do adopt this branch** for your development, it is guaranteed to have the correct source branch - `release/...` for bug fixes, `main` for new features, docs updates etc. * Commit locally as you progress: From fcb5f820f40e0de4c2595e1ff978b0640b50dd3b Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Mon, 7 Aug 2023 17:54:59 +0200 Subject: [PATCH 17/51] update bug report template --- .github/ISSUE_TEMPLATE/bug_report.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 707a87bc1e..5b2c01b79f 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -1,7 +1,7 @@ name: Bug Report description: File a bug report title: "[Bug]: " -labels: ["bug :bug:"] +labels: ["bug"] body: - type: markdown @@ -33,6 +33,7 @@ body: label: Version description: What version of Heat are you running? options: + - 1.3.x - 1.2.x - 1.1.x - main (development branch) @@ -54,6 +55,8 @@ body: label: PyTorch version description: What PyTorch version? options: + - 2.0 + - 1.13 - 1.12 - 1.11 - "1.10" From 7f9c9b1ae9554daba90de3b1ed9bd1becfd4c28c Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Sat, 26 Aug 2023 09:38:10 +0200 Subject: [PATCH 18/51] introduce what for section, numpy modules support --- README.md | 48 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 71bbd79376..16822bbd97 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,39 @@ Heat is a distributed tensor framework for high performance data analytics. [![Benchmarks](https://img.shields.io/badge/Github--Pages-Benchmarks-2ea44f)](https://helmholtz-analytics.github.io/heat/dev/bench) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +Table of contents: + COPILOT WILL FILL OUT AT THE END + +# What is Heat for? + +Heat builds on [PyTorch](https://pytorch.org/) and [mpi4py](https://mpi4py.readthedocs.io) to provide high-performance computing infrastructure for memory-intensive applications within the NumPy/SciPy ecosystem. + + +With Heat you can: +- port existing NumPy/SciPy code from single-CPU to multi-node clusters with minimal coding effort (EXAMPLE); +- exploit the entire, cumulative RAM of your many nodes for memory-intensive operations and algorithms (EXAMPLE); +- run your NumPy/SciPy code on GPUs (CUDA, ROCm, coming up: Apple MPS) (EXAMPLE). + + +Where's the catch? A few things you can't do with Heat (yet): +- Heat does not support all NumPy/SciPy functions (yet), same with scikit-learn algorithms. If you need a functionality that is not yet supported: + - [search existing issues](https://github.com/helmholtz-analytics/heat/issues) and make sure to comment if someone else already requested it; + - otherwise please [open a new issue](https://github.com/helmholtz-analytics/heat/issues/new/choose). + +- Heat does not support deployments across physically distributed clusters. (TOO STRONG?) + + +Here's a summary of supported functions and algorithms: (DRAFT) + +| Numpy Module | Supported | +|--------------|-----------| +| random | ✔️ | +| linalg | ✔️ | +| fft | ❌ | + + +Check out the [Heat API Reference](https://heat.readthedocs.io/en/latest/autoapi/index.html) for a complete list. + # Goals Heat is a flexible and seamless open-source software for high performance data @@ -73,19 +106,6 @@ it, if you do not need HDF5 or NetCDF support. **It is also very important to ensure that the PyTorch version is compatible with the local CUDA installation.** More information can be found [here](https://pytorch.org/get-started/locally/). -# Hacking - -If you want to work with the development version, you can check out the sources using - -``` -$ git clone -``` - -The installation can then be done from the checked-out sources with - -``` -$ pip install heat[hdf5,netcdf,dev] -``` # Getting Started @@ -131,7 +151,7 @@ Local torch tensor on rank 0 : tensor([0, 1, 2, 3, 4], dtype=torch.int32) Local torch tensor on rank 1 : tensor([5, 6, 7, 8, 9], dtype=torch.int32) ``` -## Resources: +## Resources * [Heat Tutorials](https://heat.readthedocs.io/en/latest/tutorials.html) * [Heat API Reference](https://heat.readthedocs.io/en/latest/autoapi/index.html) From 0cdcf0d90d0d25cf5ae6435b8afbf552751278e0 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Sat, 26 Aug 2023 09:43:28 +0200 Subject: [PATCH 19/51] Update README.md From 3c337c66f325af5d22a27ee14639223e9645dfce Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Mon, 28 Aug 2023 10:18:52 +0200 Subject: [PATCH 20/51] add faq section --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 16822bbd97..21008a7b89 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,13 @@ Here's a summary of supported functions and algorithms: (DRAFT) Check out the [Heat API Reference](https://heat.readthedocs.io/en/latest/autoapi/index.html) for a complete list. +# FAQ + + - Users + - Developers + - Students + - system administrators + # Goals Heat is a flexible and seamless open-source software for high performance data From 3968c3f98b8af416be46c865e5628a43c468cda8 Mon Sep 17 00:00:00 2001 From: Michael Tarnawa Date: Wed, 20 Sep 2023 15:57:38 +0200 Subject: [PATCH 21/51] update min python version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b3a55e2a58..b1196ad382 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ If you found a bug or miss a feature, then please file a new [issue](https://git # Requirements -Heat requires Python 3.7 or newer. +Heat requires Python 3.8 or newer. Heat is based on [PyTorch](https://pytorch.org/). Specifically, we are exploiting PyTorch's support for GPUs *and* MPI parallelism. For MPI support we utilize [mpi4py](https://mpi4py.readthedocs.io). Both packages can be installed via pip From 8771b870f7489c287a361aeb8489a9e906067d03 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Wed, 20 Sep 2023 19:25:03 +0200 Subject: [PATCH 22/51] add numpy coverage tables --- numpy_coverage_tables.md | 369 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 369 insertions(+) create mode 100644 numpy_coverage_tables.md diff --git a/numpy_coverage_tables.md b/numpy_coverage_tables.md new file mode 100644 index 0000000000..2582c72190 --- /dev/null +++ b/numpy_coverage_tables.md @@ -0,0 +1,369 @@ +# NumPy Coverage Tables +This file is automatically generated by `numpy_coverage_tables.py`. +Please do not edit this file directly, but instead edit `numpy_coverage_tables.py` and run it to generate this file. +The following tables show the NumPy functions supported by Heat. +## Table of Contents +0. [NumPy Mathematical Functions](#numpy--mathematical-functions) +1. [NumPy Array Creation](#numpy-array-creation) +2. [NumPy Array Manipulation](#numpy-array-manipulation) +3. [NumPy Binary Operations](#numpy-binary-operations) +4. [NumPy I/O Operations](#numpy-i/o-operations) +5. [NumPy LinAlg Operations](#numpy-linalg-operations) +6. [NumPy Logic Functions](#numpy-logic-functions) +7. [NumPy Sorting Operations](#numpy-sorting-operations) +8. [NumPy Statistical Operations](#numpy-statistical-operations) + +## NumPy Mathematical Functions +| NumPy Mathematical Functions | Heat | +|---|---| +| sin | ✅ | +| cos | ✅ | +| tan | ✅ | +| arcsin | ✅ | +| arccos | ✅ | +| arctan | ✅ | +| hypot | ✅ | +| arctan2 | ✅ | +| degrees | ✅ | +| radians | ✅ | +| unwrap | ❌ | +| deg2rad | ✅ | +| rad2deg | ✅ | +| sinh | ✅ | +| cosh | ✅ | +| tanh | ✅ | +| arcsinh | ✅ | +| arccosh | ✅ | +| arctanh | ✅ | +| round | ✅ | +| around | ❌ | +| rint | ❌ | +| fix | ❌ | +| floor | ✅ | +| ceil | ✅ | +| trunc | ✅ | +| prod | ✅ | +| sum | ✅ | +| nanprod | ✅ | +| nansum | ✅ | +| cumprod | ✅ | +| cumsum | ✅ | +| nancumprod | ❌ | +| nancumsum | ❌ | +| diff | ✅ | +| ediff1d | ❌ | +| gradient | ❌ | +| cross | ✅ | +| trapz | ❌ | +| exp | ✅ | +| expm1 | ✅ | +| exp2 | ✅ | +| log | ✅ | +| log10 | ✅ | +| log2 | ✅ | +| log1p | ✅ | +| logaddexp | ✅ | +| logaddexp2 | ✅ | +| i0 | ❌ | +| sinc | ❌ | +| signbit | ✅ | +| copysign | ✅ | +| frexp | ❌ | +| ldexp | ❌ | +| nextafter | ❌ | +| spacing | ❌ | +| lcm | ✅ | +| gcd | ✅ | +| add | ✅ | +| reciprocal | ❌ | +| positive | ✅ | +| negative | ✅ | +| multiply | ✅ | +| divide | ✅ | +| power | ✅ | +| subtract | ✅ | +| true_divide | ❌ | +| floor_divide | ✅ | +| float_power | ❌ | +| fmod | ✅ | +| mod | ✅ | +| modf | ✅ | +| remainder | ✅ | +| divmod | ❌ | +| angle | ✅ | +| real | ✅ | +| imag | ✅ | +| conj | ✅ | +| conjugate | ✅ | +| maximum | ✅ | +| max | ✅ | +| amax | ❌ | +| fmax | ❌ | +| nanmax | ❌ | +| minimum | ✅ | +| min | ✅ | +| amin | ❌ | +| fmin | ❌ | +| nanmin | ❌ | +| convolve | ✅ | +| clip | ✅ | +| sqrt | ✅ | +| cbrt | ❌ | +| square | ✅ | +| absolute | ✅ | +| fabs | ✅ | +| sign | ✅ | +| heaviside | ❌ | +| nan_to_num | ✅ | +| real_if_close | ❌ | +| interp | ❌ | +## NumPy Array Creation +| NumPy Array Creation | Heat | +|---|---| +| empty | ✅ | +| empty_like | ✅ | +| eye | ✅ | +| identity | ❌ | +| ones | ✅ | +| ones_like | ✅ | +| zeros | ✅ | +| zeros_like | ✅ | +| full | ✅ | +| full_like | ✅ | +| array | ✅ | +| asarray | ✅ | +| asanyarray | ❌ | +| ascontiguousarray | ❌ | +| asmatrix | ❌ | +| copy | ✅ | +| frombuffer | ❌ | +| from_dlpack | ❌ | +| fromfile | ❌ | +| fromfunction | ❌ | +| fromiter | ❌ | +| fromstring | ❌ | +| loadtxt | ❌ | +| arange | ✅ | +| linspace | ✅ | +| logspace | ✅ | +| geomspace | ❌ | +| meshgrid | ✅ | +| mgrid | ❌ | +| ogrid | ❌ | +| diag | ✅ | +| diagflat | ❌ | +| tri | ❌ | +| tril | ✅ | +| triu | ✅ | +| vander | ❌ | +| mat | ❌ | +| bmat | ❌ | +## NumPy Array Manipulation +| NumPy Array Manipulation | Heat | +|---|---| +| copyto | ❌ | +| shape | ✅ | +| reshape | ✅ | +| ravel | ✅ | +| flat | ❌ | +| flatten | ✅ | +| moveaxis | ✅ | +| rollaxis | ❌ | +| swapaxes | ✅ | +| T | ❌ | +| transpose | ✅ | +| atleast_1d | ❌ | +| atleast_2d | ❌ | +| atleast_3d | ❌ | +| broadcast | ❌ | +| broadcast_to | ✅ | +| broadcast_arrays | ✅ | +| expand_dims | ✅ | +| squeeze | ✅ | +| asarray | ✅ | +| asanyarray | ❌ | +| asmatrix | ❌ | +| asfarray | ❌ | +| asfortranarray | ❌ | +| ascontiguousarray | ❌ | +| asarray_chkfinite | ❌ | +| require | ❌ | +| concatenate | ✅ | +| stack | ✅ | +| block | ❌ | +| vstack | ✅ | +| hstack | ✅ | +| dstack | ❌ | +| column_stack | ✅ | +| row_stack | ✅ | +| split | ✅ | +| array_split | ❌ | +| dsplit | ✅ | +| hsplit | ✅ | +| vsplit | ✅ | +| tile | ✅ | +| repeat | ✅ | +| delete | ❌ | +| insert | ❌ | +| append | ❌ | +| resize | ❌ | +| trim_zeros | ❌ | +| unique | ✅ | +| flip | ✅ | +| fliplr | ✅ | +| flipud | ✅ | +| reshape | ✅ | +| roll | ✅ | +| rot90 | ✅ | +## NumPy Binary Operations +| NumPy Binary Operations | Heat | +|---|---| +| bitwise_and | ✅ | +| bitwise_or | ✅ | +| bitwise_xor | ✅ | +| invert | ✅ | +| left_shift | ✅ | +| right_shift | ✅ | +| packbits | ❌ | +| unpackbits | ❌ | +| binary_repr | ❌ | +## NumPy I/O Operations +| NumPy I/O Operations | Heat | +|---|---| +| load | ✅ | +| save | ✅ | +| savez | ❌ | +| savez_compressed | ❌ | +| loadtxt | ❌ | +| savetxt | ❌ | +| genfromtxt | ❌ | +| fromregex | ❌ | +| fromstring | ❌ | +| tofile | ❌ | +| tolist | ❌ | +| array2string | ❌ | +| array_repr | ❌ | +| array_str | ❌ | +| format_float_positional | ❌ | +| format_float_scientific | ❌ | +| memmap | ❌ | +| open_memmap | ❌ | +| set_printoptions | ✅ | +| get_printoptions | ✅ | +| set_string_function | ❌ | +| printoptions | ❌ | +| binary_repr | ❌ | +| base_repr | ❌ | +| DataSource | ❌ | +| format | ❌ | +## NumPy LinAlg Operations +| NumPy LinAlg Operations | Heat | +|---|---| +| ldot | ❌ | +| linalg.multi_dot | ❌ | +| vdot | ✅ | +| inner | ❌ | +| outer | ✅ | +| matmul | ✅ | +| tensordot | ❌ | +| einsum | ❌ | +| einsum_path | ❌ | +| linalg.matrix_power | ❌ | +| kron | ❌ | +| linalg.cholesky | ❌ | +| linalg.qr | ❌ | +| linalg.svd | ❌ | +| linalg.eig | ❌ | +| linalg.eigh | ❌ | +| linalg.eigvals | ❌ | +| linalg.eigvalsh | ❌ | +| linalg.norm | ❌ | +| linalg.cond | ❌ | +| linalg.det | ❌ | +| linalg.matrix_rank | ❌ | +| linalg.slogdet | ❌ | +| trace | ✅ | +| linalg.solve | ❌ | +| linalg.tensorsolve | ❌ | +| linalg.lstsq | ❌ | +| linalg.inv | ❌ | +| linalg.pinv | ❌ | +| linalg.tensorinv | ❌ | +## NumPy Logic Functions +| NumPy Logic Functions | Heat | +|---|---| +| all | ✅ | +| any | ✅ | +| isfinite | ✅ | +| isinf | ✅ | +| isnan | ✅ | +| isnat | ❌ | +| isneginf | ✅ | +| isposinf | ✅ | +| iscomplex | ✅ | +| iscomplexobj | ❌ | +| isfortran | ❌ | +| isreal | ✅ | +| isrealobj | ❌ | +| isscalar | ❌ | +| logical_and | ✅ | +| logical_or | ✅ | +| logical_not | ✅ | +| logical_xor | ✅ | +| allclose | ✅ | +| isclose | ✅ | +| array_equal | ❌ | +| array_equiv | ❌ | +| greater | ✅ | +| greater_equal | ✅ | +| less | ✅ | +| less_equal | ✅ | +| equal | ✅ | +| not_equal | ✅ | +## NumPy Sorting Operations +| NumPy Sorting Operations | Heat | +|---|---| +| sort | ✅ | +| lexsort | ❌ | +| argsort | ❌ | +| sort | ✅ | +| sort_complex | ❌ | +| partition | ❌ | +| argpartition | ❌ | +| argmax | ✅ | +| nanargmax | ❌ | +| argmin | ✅ | +| nanargmin | ❌ | +| argwhere | ❌ | +| nonzero | ✅ | +| flatnonzero | ❌ | +| where | ✅ | +| searchsorted | ❌ | +| extract | ❌ | +| count_nonzero | ❌ | +## NumPy Statistical Operations +| NumPy Statistical Operations | Heat | +|---|---| +| ptp | ❌ | +| percentile | ✅ | +| nanpercentile | ❌ | +| quantile | ❌ | +| nanquantile | ❌ | +| median | ✅ | +| average | ✅ | +| mean | ✅ | +| std | ✅ | +| var | ✅ | +| nanmedian | ❌ | +| nanmean | ❌ | +| nanstd | ❌ | +| nanvar | ❌ | +| corrcoef | ❌ | +| correlate | ❌ | +| cov | ✅ | +| histogram | ✅ | +| histogram2d | ❌ | +| histogramdd | ❌ | +| bincount | ✅ | +| histogram_bin_edges | ❌ | +| digitize | ✅ | From da135d67cd4572df4ffed8175b1ba4b43cefa935 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Wed, 20 Sep 2023 19:29:37 +0200 Subject: [PATCH 23/51] fix ToC --- numpy_coverage_tables.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/numpy_coverage_tables.md b/numpy_coverage_tables.md index 2582c72190..f800fa11d6 100644 --- a/numpy_coverage_tables.md +++ b/numpy_coverage_tables.md @@ -3,15 +3,15 @@ This file is automatically generated by `numpy_coverage_tables.py`. Please do not edit this file directly, but instead edit `numpy_coverage_tables.py` and run it to generate this file. The following tables show the NumPy functions supported by Heat. ## Table of Contents -0. [NumPy Mathematical Functions](#numpy--mathematical-functions) -1. [NumPy Array Creation](#numpy-array-creation) -2. [NumPy Array Manipulation](#numpy-array-manipulation) -3. [NumPy Binary Operations](#numpy-binary-operations) -4. [NumPy I/O Operations](#numpy-i/o-operations) -5. [NumPy LinAlg Operations](#numpy-linalg-operations) -6. [NumPy Logic Functions](#numpy-logic-functions) -7. [NumPy Sorting Operations](#numpy-sorting-operations) -8. [NumPy Statistical Operations](#numpy-statistical-operations) +1. [NumPy Mathematical Functions](#numpy--mathematical-functions) +2. [NumPy Array Creation](#numpy-array-creation) +3. [NumPy Array Manipulation](#numpy-array-manipulation) +4. [NumPy Binary Operations](#numpy-binary-operations) +5. [NumPy I/O Operations](#numpy-i/o-operations) +6. [NumPy LinAlg Operations](#numpy-linalg-operations) +7. [NumPy Logic Functions](#numpy-logic-functions) +8. [NumPy Sorting Operations](#numpy-sorting-operations) +9. [NumPy Statistical Operations](#numpy-statistical-operations) ## NumPy Mathematical Functions | NumPy Mathematical Functions | Heat | From 6733985d9c9d1abc75dd25f467fcc48b631014e5 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Wed, 20 Sep 2023 19:32:38 +0200 Subject: [PATCH 24/51] fix i/o --- numpy_coverage_tables.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/numpy_coverage_tables.md b/numpy_coverage_tables.md index f800fa11d6..ec2ad5eec9 100644 --- a/numpy_coverage_tables.md +++ b/numpy_coverage_tables.md @@ -7,7 +7,7 @@ The following tables show the NumPy functions supported by Heat. 2. [NumPy Array Creation](#numpy-array-creation) 3. [NumPy Array Manipulation](#numpy-array-manipulation) 4. [NumPy Binary Operations](#numpy-binary-operations) -5. [NumPy I/O Operations](#numpy-i/o-operations) +5. [NumPy IO Operations](#numpy-io-operations) 6. [NumPy LinAlg Operations](#numpy-linalg-operations) 7. [NumPy Logic Functions](#numpy-logic-functions) 8. [NumPy Sorting Operations](#numpy-sorting-operations) @@ -227,8 +227,8 @@ The following tables show the NumPy functions supported by Heat. | packbits | ❌ | | unpackbits | ❌ | | binary_repr | ❌ | -## NumPy I/O Operations -| NumPy I/O Operations | Heat | +## NumPy IO Operations +| NumPy IO Operations | Heat | |---|---| | load | ✅ | | save | ✅ | From 62588ee00362d4915a69de55041bfbad6bb6159c Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Wed, 20 Sep 2023 19:35:49 +0200 Subject: [PATCH 25/51] link to ToC --- numpy_coverage_tables.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/numpy_coverage_tables.md b/numpy_coverage_tables.md index ec2ad5eec9..11d386a4f0 100644 --- a/numpy_coverage_tables.md +++ b/numpy_coverage_tables.md @@ -14,6 +14,8 @@ The following tables show the NumPy functions supported by Heat. 9. [NumPy Statistical Operations](#numpy-statistical-operations) ## NumPy Mathematical Functions +[Back to Table of Contents](#table-of-contents) + | NumPy Mathematical Functions | Heat | |---|---| | sin | ✅ | @@ -118,6 +120,8 @@ The following tables show the NumPy functions supported by Heat. | real_if_close | ❌ | | interp | ❌ | ## NumPy Array Creation +[Back to Table of Contents](#table-of-contents) + | NumPy Array Creation | Heat | |---|---| | empty | ✅ | @@ -159,6 +163,8 @@ The following tables show the NumPy functions supported by Heat. | mat | ❌ | | bmat | ❌ | ## NumPy Array Manipulation +[Back to Table of Contents](#table-of-contents) + | NumPy Array Manipulation | Heat | |---|---| | copyto | ❌ | @@ -216,6 +222,8 @@ The following tables show the NumPy functions supported by Heat. | roll | ✅ | | rot90 | ✅ | ## NumPy Binary Operations +[Back to Table of Contents](#table-of-contents) + | NumPy Binary Operations | Heat | |---|---| | bitwise_and | ✅ | @@ -228,6 +236,8 @@ The following tables show the NumPy functions supported by Heat. | unpackbits | ❌ | | binary_repr | ❌ | ## NumPy IO Operations +[Back to Table of Contents](#table-of-contents) + | NumPy IO Operations | Heat | |---|---| | load | ✅ | @@ -257,6 +267,8 @@ The following tables show the NumPy functions supported by Heat. | DataSource | ❌ | | format | ❌ | ## NumPy LinAlg Operations +[Back to Table of Contents](#table-of-contents) + | NumPy LinAlg Operations | Heat | |---|---| | ldot | ❌ | @@ -290,6 +302,8 @@ The following tables show the NumPy functions supported by Heat. | linalg.pinv | ❌ | | linalg.tensorinv | ❌ | ## NumPy Logic Functions +[Back to Table of Contents](#table-of-contents) + | NumPy Logic Functions | Heat | |---|---| | all | ✅ | @@ -321,6 +335,8 @@ The following tables show the NumPy functions supported by Heat. | equal | ✅ | | not_equal | ✅ | ## NumPy Sorting Operations +[Back to Table of Contents](#table-of-contents) + | NumPy Sorting Operations | Heat | |---|---| | sort | ✅ | @@ -342,6 +358,8 @@ The following tables show the NumPy functions supported by Heat. | extract | ❌ | | count_nonzero | ❌ | ## NumPy Statistical Operations +[Back to Table of Contents](#table-of-contents) + | NumPy Statistical Operations | Heat | |---|---| | ptp | ❌ | From c2f311143559e1ab3e08db5a75d95eebdf4d84ec Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Wed, 20 Sep 2023 19:56:54 +0200 Subject: [PATCH 26/51] update tables --- numpy_coverage_tables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numpy_coverage_tables.md b/numpy_coverage_tables.md index 11d386a4f0..026ffa25a1 100644 --- a/numpy_coverage_tables.md +++ b/numpy_coverage_tables.md @@ -271,7 +271,7 @@ The following tables show the NumPy functions supported by Heat. | NumPy LinAlg Operations | Heat | |---|---| -| ldot | ❌ | +| dot | ✅ | | linalg.multi_dot | ❌ | | vdot | ✅ | | inner | ❌ | From 584505d635259d96a9f6d82a44bbc4746435da38 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Tue, 26 Sep 2023 15:46:21 +0200 Subject: [PATCH 27/51] added file for dxarray operations --- heat/dxarray/dxarray.py | 2 +- heat/dxarray/dxarray_operations.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 heat/dxarray/dxarray_operations.py diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py index 13562736c7..79f7c07b49 100644 --- a/heat/dxarray/dxarray.py +++ b/heat/dxarray/dxarray.py @@ -392,7 +392,7 @@ def balance_(self): def xarray(self): """ - Convert given DXarray (possibly distributed over some processes) to a non-distributed xarray (:class:`xarray.DataArray`) + Convert given DXarray (possibly distributed over some processes) to a non-distributed xarray (:class:`xarray.DataArray`) on all processes. """ non_dist_copy = self.resplit_(None) if non_dist_copy.coords is None: diff --git a/heat/dxarray/dxarray_operations.py b/heat/dxarray/dxarray_operations.py new file mode 100644 index 0000000000..0a4e89b15c --- /dev/null +++ b/heat/dxarray/dxarray_operations.py @@ -0,0 +1,8 @@ +""" +Operations on Dxarray objects +""" + +import torch +import heat as ht + +from .dxarray import DXarray From c48b8c4d45a26b02848811979ab851999671b58b Mon Sep 17 00:00:00 2001 From: Hoppe Date: Wed, 27 Sep 2023 10:53:38 +0200 Subject: [PATCH 28/51] added file for tests of dxarray class --- heat/dxarray/test_dxarray.py | 29 +++++++++++++++++++++++++++++ heat/dxarray/tests/__init__.py | 0 2 files changed, 29 insertions(+) create mode 100644 heat/dxarray/test_dxarray.py create mode 100644 heat/dxarray/tests/__init__.py diff --git a/heat/dxarray/test_dxarray.py b/heat/dxarray/test_dxarray.py new file mode 100644 index 0000000000..3e0b013bb1 --- /dev/null +++ b/heat/dxarray/test_dxarray.py @@ -0,0 +1,29 @@ +import torch +import os +import unittest +import heat as ht +import numpy as np +import xarray as xr +from mpi4py import MPI + +from heat.core.tests.test_suites.basic_test import TestCase + + +class TestDXarray(TestCase): + def test_attributes(self): + pass + + def test_is_balanced(self): + pass + + def test_resplit_(self): + pass + + def test_balance_(self): + pass + + def test_xarray(self): + pass + + def test_from_xarray(self): + pass diff --git a/heat/dxarray/tests/__init__.py b/heat/dxarray/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 From 280985dbe3c5f6f94fcf512e96437bc58f99e92b Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Wed, 27 Sep 2023 12:25:27 +0200 Subject: [PATCH 29/51] fix vulnerability report --- .github/ISSUE_TEMPLATE/vulnerability.yml | 30 ++++++++++++------------ 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/vulnerability.yml b/.github/ISSUE_TEMPLATE/vulnerability.yml index be07e18a75..e264c89fbb 100644 --- a/.github/ISSUE_TEMPLATE/vulnerability.yml +++ b/.github/ISSUE_TEMPLATE/vulnerability.yml @@ -1,4 +1,4 @@ -name: "Vulnerability Report" +name: "\U0001F6A8 Vulnerability Report" description: Report a security vulnerability in our project. title: "[VULNERABILITY]: " labels: ["security, High Priority"] @@ -14,50 +14,50 @@ body: attributes: label: Affected Version(s) description: List the affected versions of the library. - validations: - required: true + validations: + required: true - type: textarea id: severity attributes: label: Severity description: Specify the severity of the vulnerability (e.g., Low/Medium/High/Critical). - validations: - required: true + validations: + required: true - type: textarea id: description attributes: label: Description description: Provide a clear and concise description of the security vulnerability. - validations: - required: true + validations: + required: true - type: textarea id: steps-to-reproduce attributes: label: Steps to Reproduce description: Outline the steps to reproduce the vulnerability, including any relevant code snippets or configuration settings. - validations: - required: true + validations: + required: true - type: textarea id: expected-behavior attributes: label: Expected Behavior description: Explain what you expected to happen when following the steps above. - validations: - required: true + validations: + required: true - type: textarea id: actual-behavior attributes: label: Actual Behavior description: Describe what actually happened when you followed the steps above, highlighting the security issue. - validations: - required: true + validations: + required: true - type: textarea id: impact attributes: label: Impact description: Discuss the potential impact of this vulnerability, including any possible consequences or risks associated with its exploitation. - validations: - required: true + validations: + required: true - type: textarea id: proof-of-concept attributes: From 44ef0caa1ebe57763b52485dafe06d8c68982f73 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Wed, 27 Sep 2023 15:06:08 +0200 Subject: [PATCH 30/51] README overhaul, general updates --- README.md | 169 ++++++++---------- coverage_tables.md | 387 ++++++++++++++++++++++++++++++++++++++++ doc/images/fzj_logo.svg | 98 ++-------- quick_start.md | 17 +- 4 files changed, 479 insertions(+), 192 deletions(-) create mode 100644 coverage_tables.md diff --git a/README.md b/README.md index b1196ad382..b9062d8961 100644 --- a/README.md +++ b/README.md @@ -29,121 +29,68 @@ Heat builds on [PyTorch](https://pytorch.org/) and [mpi4py](https://mpi4py.readt With Heat you can: -- port existing NumPy/SciPy code from single-CPU to multi-node clusters with minimal coding effort (EXAMPLE); -- exploit the entire, cumulative RAM of your many nodes for memory-intensive operations and algorithms (EXAMPLE); -- run your NumPy/SciPy code on GPUs (CUDA, ROCm, coming up: Apple MPS) (EXAMPLE). +- port existing NumPy/SciPy code from single-CPU to multi-node clusters with minimal coding effort; +- exploit the entire, cumulative RAM of your many nodes for memory-intensive operations and algorithms; +- run your NumPy/SciPy code on GPUs (CUDA, ROCm, coming up: Apple MPS). +Check out our [coverage tables](coverage_tables.md) to see which NumPy, SciPy, scikit-learn functions are already supported. -Where's the catch? A few things you can't do with Heat (yet): -- Heat does not support all NumPy/SciPy functions (yet), same with scikit-learn algorithms. If you need a functionality that is not yet supported: + If you need a functionality that is not yet supported: - [search existing issues](https://github.com/helmholtz-analytics/heat/issues) and make sure to comment if someone else already requested it; - - otherwise please [open a new issue](https://github.com/helmholtz-analytics/heat/issues/new/choose). + - [open a new issue](https://github.com/helmholtz-analytics/heat/issues/new/choose). -- Heat does not support deployments across physically distributed clusters. (TOO STRONG?) - -Here's a summary of supported functions and algorithms: (DRAFT) - -| Numpy Module | Supported | -|--------------|-----------| -| random | ✔️ | -| linalg | ✔️ | -| fft | ❌ | - - -Check out the [Heat API Reference](https://heat.readthedocs.io/en/latest/autoapi/index.html) for a complete list. - -# FAQ - - - Users - - Developers - - Students - - system administrators - -# Goals - -Heat is a flexible and seamless open-source software for high performance data -analytics and machine learning. It provides highly optimized algorithms and data -structures for tensor computations using CPUs, GPUs, and distributed cluster -systems on top of MPI. The goal of Heat is to fill the gap between data -analytics and machine learning libraries with a strong focus on single-node -performance, and traditional high-performance computing (HPC). Heat's generic -Python-first programming interface integrates seamlessly with the existing data -science ecosystem and makes it as effortless as using numpy to write scalable -scientific and data science applications. - -Heat allows you to tackle your actual Big Data challenges that go beyond the -computational and memory needs of your laptop and desktop. +Check out our [features](#features) and the [Heat API Reference](https://heat.readthedocs.io/en/latest/autoapi/index.html) for a complete list of functionalities. # Features -* High-performance n-dimensional tensors +* High-performance n-dimensional arrays * CPU, GPU, and distributed computation using MPI * Powerful data analytics and machine learning methods -* Abstracted communication via split tensors -* Python API - -# Support Channels +* Seamless integration with the NumPy/SciPy ecosystem +* Python array API (work in progress) -We use [GitHub Discussions](https://github.com/helmholtz-analytics/heat/discussions) as a forum for questions about Heat. -If you found a bug or miss a feature, then please file a new [issue](https://github.com/helmholtz-analytics/heat/issues/new/choose). +# Installation -# Requirements +## Requirements -Heat requires Python 3.8 or newer. -Heat is based on [PyTorch](https://pytorch.org/). Specifically, we are exploiting -PyTorch's support for GPUs *and* MPI parallelism. For MPI support we utilize -[mpi4py](https://mpi4py.readthedocs.io). Both packages can be installed via pip -or automatically using the setup.py. +### Basics +- python >= 3.8 +- MPI (OpenMPI, MPICH, Intel MPI, etc.) +- mpi4py >= 3.0.0 +- pytorch >= 1.8.0 -# Installation +### Parallel I/O +- h5py +- netCDF4 -Tagged releases are made available on the -[Python Package Index (PyPI)](https://pypi.org/project/heat/). You can typically -install the latest version with +### pip +Install the latest version with +```bash +pip install heat[hdf5,netcdf] ``` -$ pip install heat[hdf5,netcdf] -``` - where the part in brackets is a list of optional dependencies. You can omit it, if you do not need HDF5 or NetCDF support. -**It is recommended to use the most recent supported version of PyTorch!** - -**It is also very important to ensure that the PyTorch version is compatible with the local CUDA installation.** -More information can be found [here](https://pytorch.org/get-started/locally/). +### **conda** +The conda build includes all dependencies **including OpenMPI**. +```bash + conda install -c conda-forge heat + ``` # Getting Started -TL;DR: [Quick Start](quick_start.md) (Read this to get a quick overview of Heat). +Go to [Quick Start](quick_start.md) for a quick overview. Check out our Jupyter Notebook [**Tutorial**](https://github.com/helmholtz-analytics/heat/blob/main/scripts/) -right here on GitHub or in the /scripts directory, to learn and understand about the basics and working of Heat. +right here on GitHub or in the `./scripts` directory, to learn and understand Heat's basics. The complete documentation of the latest version is always deployed on [Read the Docs](https://heat.readthedocs.io/). -***Try your first Heat program*** - -```shell -$ python -``` - -```python ->>> import heat as ht ->>> x = ht.arange(10,split=0) ->>> print(x) -DNDarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=ht.int32, device=cpu:0, split=0) ->>> y = ht.ones(10,split=0) ->>> print(y) -DNDarray([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=ht.float32, device=cpu:0, split=0) ->>> print(x + y) -DNDarray([ 1., 2., 3., 4., 5., 6., 7., 8., 9., 10.], dtype=ht.float32, device=cpu:0, split=0) -``` - -### Also, you can test your setup by running the [`heat_test.py`](https://github.com/helmholtz-analytics/heat/blob/main/scripts/heat_test.py) script: +### You can test your setup by running the [`heat_test.py`](https://github.com/helmholtz-analytics/heat/blob/main/scripts/heat_test.py) script: ```shell mpirun -n 2 python heat_test.py @@ -159,33 +106,56 @@ Local torch tensor on rank 0 : tensor([0, 1, 2, 3, 4], dtype=torch.int32) Local torch tensor on rank 1 : tensor([5, 6, 7, 8, 9], dtype=torch.int32) ``` -## Resources -* [Heat Tutorials](https://heat.readthedocs.io/en/latest/tutorials.html) -* [Heat API Reference](https://heat.readthedocs.io/en/latest/autoapi/index.html) +# FAQ +In progress... -### Parallel Computing and MPI: + - Users + - Developers + - Students + - system administrators -* @davidhenty's [course](https://www.archer2.ac.uk/training/courses/200514-mpi/) -* Wes Kendall's [Tutorials](https://mpitutorial.com/tutorials/) + -# Contribution guidelines +# Support Channels -**We welcome contributions from the community, if you want to contribute to Heat, be sure to review the [Contribution Guidelines](contributing.md) before getting started!** +We use [GitHub Discussions](https://github.com/helmholtz-analytics/heat/discussions) as a forum for questions about Heat. +If you found a bug or miss a feature, then please file a new [issue](https://github.com/helmholtz-analytics/heat/issues/new/choose). -We use [GitHub issues](https://github.com/helmholtz-analytics/heat/issues) for tracking requests and bugs, please see [Discussions](https://github.com/helmholtz-analytics/heat/discussions) for general questions and discussion, and You can also get in touch with us on [Mattermost](https://mattermost.hzdr.de/signup_user_complete/?id=3sixwk9okpbzpjyfrhen5jpqfo). You can sign up with your GitHub credentials. Once you log in, you can introduce yourself on the `Town Square` channel. -Small improvements or fixes are always appreciated; issues labeled as **"good first issue"** may be a good starting point. +# Contribution guidelines + +**We welcome contributions from the community, if you want to contribute to Heat, be sure to review the [Contribution Guidelines](contributing.md) and [Resources for MPI programming](#resources-for-mpi-programming) before getting started!** + +We use [GitHub issues](https://github.com/helmholtz-analytics/heat/issues) for tracking requests and bugs, please see [Discussions](https://github.com/helmholtz-analytics/heat/discussions) for general questions and discussion. You can also get in touch with us on [Mattermost](https://mattermost.hzdr.de/signup_user_complete/?id=3sixwk9okpbzpjyfrhen5jpqfo) (sign up with your GitHub credentials). Once you log in, you can introduce yourself on the `Town Square` channel. If you’re unsure where to start or how your skills fit in, reach out! You can ask us here on GitHub, by leaving a comment on a relevant issue that is already open. **If you are new to contributing to open source, [this guide](https://opensource.guide/how-to-contribute/) helps explain why, what, and how to get involved.** + +## Resources for MPI programming + +* [Heat Tutorials](https://heat.readthedocs.io/en/latest/tutorials.html) +* [Heat API Reference](https://heat.readthedocs.io/en/latest/autoapi/index.html) + +### Parallel Computing and MPI: + +* @davidhenty's [course](https://www.archer2.ac.uk/training/courses/200514-mpi/) +* Wes Kendall's [Tutorials](https://mpitutorial.com/tutorials/) + +### mpi4py + +* [mpi4py docs](https://mpi4py.readthedocs.io/en/stable/tutorial.html) +* [Tutorial](https://www.kth.se/blogs/pdc/2019/08/parallel-programming-in-python-mpi4py-part-1/) # License Heat is distributed under the MIT license, see our @@ -229,6 +199,9 @@ If you find Heat helpful for your research, please mention it in your publicatio Networking Fund](https://www.helmholtz.de/en/about_us/the_association/initiating_and_networking/) under project number ZT-I-0003 and the Helmholtz AI platform grant.* +*This project has received funding from Google Summer of Code (GSoC) in 2022.* + + ---
diff --git a/coverage_tables.md b/coverage_tables.md new file mode 100644 index 0000000000..026ffa25a1 --- /dev/null +++ b/coverage_tables.md @@ -0,0 +1,387 @@ +# NumPy Coverage Tables +This file is automatically generated by `numpy_coverage_tables.py`. +Please do not edit this file directly, but instead edit `numpy_coverage_tables.py` and run it to generate this file. +The following tables show the NumPy functions supported by Heat. +## Table of Contents +1. [NumPy Mathematical Functions](#numpy--mathematical-functions) +2. [NumPy Array Creation](#numpy-array-creation) +3. [NumPy Array Manipulation](#numpy-array-manipulation) +4. [NumPy Binary Operations](#numpy-binary-operations) +5. [NumPy IO Operations](#numpy-io-operations) +6. [NumPy LinAlg Operations](#numpy-linalg-operations) +7. [NumPy Logic Functions](#numpy-logic-functions) +8. [NumPy Sorting Operations](#numpy-sorting-operations) +9. [NumPy Statistical Operations](#numpy-statistical-operations) + +## NumPy Mathematical Functions +[Back to Table of Contents](#table-of-contents) + +| NumPy Mathematical Functions | Heat | +|---|---| +| sin | ✅ | +| cos | ✅ | +| tan | ✅ | +| arcsin | ✅ | +| arccos | ✅ | +| arctan | ✅ | +| hypot | ✅ | +| arctan2 | ✅ | +| degrees | ✅ | +| radians | ✅ | +| unwrap | ❌ | +| deg2rad | ✅ | +| rad2deg | ✅ | +| sinh | ✅ | +| cosh | ✅ | +| tanh | ✅ | +| arcsinh | ✅ | +| arccosh | ✅ | +| arctanh | ✅ | +| round | ✅ | +| around | ❌ | +| rint | ❌ | +| fix | ❌ | +| floor | ✅ | +| ceil | ✅ | +| trunc | ✅ | +| prod | ✅ | +| sum | ✅ | +| nanprod | ✅ | +| nansum | ✅ | +| cumprod | ✅ | +| cumsum | ✅ | +| nancumprod | ❌ | +| nancumsum | ❌ | +| diff | ✅ | +| ediff1d | ❌ | +| gradient | ❌ | +| cross | ✅ | +| trapz | ❌ | +| exp | ✅ | +| expm1 | ✅ | +| exp2 | ✅ | +| log | ✅ | +| log10 | ✅ | +| log2 | ✅ | +| log1p | ✅ | +| logaddexp | ✅ | +| logaddexp2 | ✅ | +| i0 | ❌ | +| sinc | ❌ | +| signbit | ✅ | +| copysign | ✅ | +| frexp | ❌ | +| ldexp | ❌ | +| nextafter | ❌ | +| spacing | ❌ | +| lcm | ✅ | +| gcd | ✅ | +| add | ✅ | +| reciprocal | ❌ | +| positive | ✅ | +| negative | ✅ | +| multiply | ✅ | +| divide | ✅ | +| power | ✅ | +| subtract | ✅ | +| true_divide | ❌ | +| floor_divide | ✅ | +| float_power | ❌ | +| fmod | ✅ | +| mod | ✅ | +| modf | ✅ | +| remainder | ✅ | +| divmod | ❌ | +| angle | ✅ | +| real | ✅ | +| imag | ✅ | +| conj | ✅ | +| conjugate | ✅ | +| maximum | ✅ | +| max | ✅ | +| amax | ❌ | +| fmax | ❌ | +| nanmax | ❌ | +| minimum | ✅ | +| min | ✅ | +| amin | ❌ | +| fmin | ❌ | +| nanmin | ❌ | +| convolve | ✅ | +| clip | ✅ | +| sqrt | ✅ | +| cbrt | ❌ | +| square | ✅ | +| absolute | ✅ | +| fabs | ✅ | +| sign | ✅ | +| heaviside | ❌ | +| nan_to_num | ✅ | +| real_if_close | ❌ | +| interp | ❌ | +## NumPy Array Creation +[Back to Table of Contents](#table-of-contents) + +| NumPy Array Creation | Heat | +|---|---| +| empty | ✅ | +| empty_like | ✅ | +| eye | ✅ | +| identity | ❌ | +| ones | ✅ | +| ones_like | ✅ | +| zeros | ✅ | +| zeros_like | ✅ | +| full | ✅ | +| full_like | ✅ | +| array | ✅ | +| asarray | ✅ | +| asanyarray | ❌ | +| ascontiguousarray | ❌ | +| asmatrix | ❌ | +| copy | ✅ | +| frombuffer | ❌ | +| from_dlpack | ❌ | +| fromfile | ❌ | +| fromfunction | ❌ | +| fromiter | ❌ | +| fromstring | ❌ | +| loadtxt | ❌ | +| arange | ✅ | +| linspace | ✅ | +| logspace | ✅ | +| geomspace | ❌ | +| meshgrid | ✅ | +| mgrid | ❌ | +| ogrid | ❌ | +| diag | ✅ | +| diagflat | ❌ | +| tri | ❌ | +| tril | ✅ | +| triu | ✅ | +| vander | ❌ | +| mat | ❌ | +| bmat | ❌ | +## NumPy Array Manipulation +[Back to Table of Contents](#table-of-contents) + +| NumPy Array Manipulation | Heat | +|---|---| +| copyto | ❌ | +| shape | ✅ | +| reshape | ✅ | +| ravel | ✅ | +| flat | ❌ | +| flatten | ✅ | +| moveaxis | ✅ | +| rollaxis | ❌ | +| swapaxes | ✅ | +| T | ❌ | +| transpose | ✅ | +| atleast_1d | ❌ | +| atleast_2d | ❌ | +| atleast_3d | ❌ | +| broadcast | ❌ | +| broadcast_to | ✅ | +| broadcast_arrays | ✅ | +| expand_dims | ✅ | +| squeeze | ✅ | +| asarray | ✅ | +| asanyarray | ❌ | +| asmatrix | ❌ | +| asfarray | ❌ | +| asfortranarray | ❌ | +| ascontiguousarray | ❌ | +| asarray_chkfinite | ❌ | +| require | ❌ | +| concatenate | ✅ | +| stack | ✅ | +| block | ❌ | +| vstack | ✅ | +| hstack | ✅ | +| dstack | ❌ | +| column_stack | ✅ | +| row_stack | ✅ | +| split | ✅ | +| array_split | ❌ | +| dsplit | ✅ | +| hsplit | ✅ | +| vsplit | ✅ | +| tile | ✅ | +| repeat | ✅ | +| delete | ❌ | +| insert | ❌ | +| append | ❌ | +| resize | ❌ | +| trim_zeros | ❌ | +| unique | ✅ | +| flip | ✅ | +| fliplr | ✅ | +| flipud | ✅ | +| reshape | ✅ | +| roll | ✅ | +| rot90 | ✅ | +## NumPy Binary Operations +[Back to Table of Contents](#table-of-contents) + +| NumPy Binary Operations | Heat | +|---|---| +| bitwise_and | ✅ | +| bitwise_or | ✅ | +| bitwise_xor | ✅ | +| invert | ✅ | +| left_shift | ✅ | +| right_shift | ✅ | +| packbits | ❌ | +| unpackbits | ❌ | +| binary_repr | ❌ | +## NumPy IO Operations +[Back to Table of Contents](#table-of-contents) + +| NumPy IO Operations | Heat | +|---|---| +| load | ✅ | +| save | ✅ | +| savez | ❌ | +| savez_compressed | ❌ | +| loadtxt | ❌ | +| savetxt | ❌ | +| genfromtxt | ❌ | +| fromregex | ❌ | +| fromstring | ❌ | +| tofile | ❌ | +| tolist | ❌ | +| array2string | ❌ | +| array_repr | ❌ | +| array_str | ❌ | +| format_float_positional | ❌ | +| format_float_scientific | ❌ | +| memmap | ❌ | +| open_memmap | ❌ | +| set_printoptions | ✅ | +| get_printoptions | ✅ | +| set_string_function | ❌ | +| printoptions | ❌ | +| binary_repr | ❌ | +| base_repr | ❌ | +| DataSource | ❌ | +| format | ❌ | +## NumPy LinAlg Operations +[Back to Table of Contents](#table-of-contents) + +| NumPy LinAlg Operations | Heat | +|---|---| +| dot | ✅ | +| linalg.multi_dot | ❌ | +| vdot | ✅ | +| inner | ❌ | +| outer | ✅ | +| matmul | ✅ | +| tensordot | ❌ | +| einsum | ❌ | +| einsum_path | ❌ | +| linalg.matrix_power | ❌ | +| kron | ❌ | +| linalg.cholesky | ❌ | +| linalg.qr | ❌ | +| linalg.svd | ❌ | +| linalg.eig | ❌ | +| linalg.eigh | ❌ | +| linalg.eigvals | ❌ | +| linalg.eigvalsh | ❌ | +| linalg.norm | ❌ | +| linalg.cond | ❌ | +| linalg.det | ❌ | +| linalg.matrix_rank | ❌ | +| linalg.slogdet | ❌ | +| trace | ✅ | +| linalg.solve | ❌ | +| linalg.tensorsolve | ❌ | +| linalg.lstsq | ❌ | +| linalg.inv | ❌ | +| linalg.pinv | ❌ | +| linalg.tensorinv | ❌ | +## NumPy Logic Functions +[Back to Table of Contents](#table-of-contents) + +| NumPy Logic Functions | Heat | +|---|---| +| all | ✅ | +| any | ✅ | +| isfinite | ✅ | +| isinf | ✅ | +| isnan | ✅ | +| isnat | ❌ | +| isneginf | ✅ | +| isposinf | ✅ | +| iscomplex | ✅ | +| iscomplexobj | ❌ | +| isfortran | ❌ | +| isreal | ✅ | +| isrealobj | ❌ | +| isscalar | ❌ | +| logical_and | ✅ | +| logical_or | ✅ | +| logical_not | ✅ | +| logical_xor | ✅ | +| allclose | ✅ | +| isclose | ✅ | +| array_equal | ❌ | +| array_equiv | ❌ | +| greater | ✅ | +| greater_equal | ✅ | +| less | ✅ | +| less_equal | ✅ | +| equal | ✅ | +| not_equal | ✅ | +## NumPy Sorting Operations +[Back to Table of Contents](#table-of-contents) + +| NumPy Sorting Operations | Heat | +|---|---| +| sort | ✅ | +| lexsort | ❌ | +| argsort | ❌ | +| sort | ✅ | +| sort_complex | ❌ | +| partition | ❌ | +| argpartition | ❌ | +| argmax | ✅ | +| nanargmax | ❌ | +| argmin | ✅ | +| nanargmin | ❌ | +| argwhere | ❌ | +| nonzero | ✅ | +| flatnonzero | ❌ | +| where | ✅ | +| searchsorted | ❌ | +| extract | ❌ | +| count_nonzero | ❌ | +## NumPy Statistical Operations +[Back to Table of Contents](#table-of-contents) + +| NumPy Statistical Operations | Heat | +|---|---| +| ptp | ❌ | +| percentile | ✅ | +| nanpercentile | ❌ | +| quantile | ❌ | +| nanquantile | ❌ | +| median | ✅ | +| average | ✅ | +| mean | ✅ | +| std | ✅ | +| var | ✅ | +| nanmedian | ❌ | +| nanmean | ❌ | +| nanstd | ❌ | +| nanvar | ❌ | +| corrcoef | ❌ | +| correlate | ❌ | +| cov | ✅ | +| histogram | ✅ | +| histogram2d | ❌ | +| histogramdd | ❌ | +| bincount | ✅ | +| histogram_bin_edges | ❌ | +| digitize | ✅ | diff --git a/doc/images/fzj_logo.svg b/doc/images/fzj_logo.svg index 53868ecb83..3b765373b7 100644 --- a/doc/images/fzj_logo.svg +++ b/doc/images/fzj_logo.svg @@ -1,86 +1,14 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + +image/svg+xml + + + + +Logo_FZ_Juellich_RGB_schutzzone_weiss + + diff --git a/quick_start.md b/quick_start.md index f021393d96..6b700cb298 100644 --- a/quick_start.md +++ b/quick_start.md @@ -6,16 +6,12 @@ No-frills instructions for [new users](#new-users-condaconda-pippip-hpchpc-docke ### `conda` -A Heat conda build is [in progress](https://github.com/helmholtz-analytics/heat/issues/1050). -The script [heat_env.yml](https://github.com/helmholtz-analytics/heat/blob/main/scripts/heat_env.yml): +The Heat conda build includes all dependencies including OpenMPI. -- creates a virtual environment `heat_env` -- installs all dependencies including OpenMPI using [conda](https://conda.io/projects/conda/en/latest/user-guide/getting-started.html) -- installs Heat via `pip` - -``` -conda env create -f heat_env.yml +```shell +conda create --name heat_env conda activate heat_env +conda -c conda-forge heat ``` [Test](#test) your installation. @@ -34,6 +30,9 @@ pip install heat[hdf5,netcdf] [Test](#test) your installation. +### HPC +Work in progress. + ### Docker Get the docker image from our package repository @@ -77,7 +76,7 @@ Local torch tensor on rank 1 : tensor([5, 6, 7, 8, 9], dtype=torch.int32) 3. [Fork](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) or, if you have write access, clone the [Heat repository](https://github.com/helmholtz-analytics/heat). -4. Create a virtual environment `heat_dev` with all dependencies via [heat_dev.yml](https://github.com/helmholtz-analytics/heat/blob/main/scripts/heat_dev.yml). Note that `heat_dev.yml` does not install Heat via `pip` (as opposed to [`heat_env.yml`](#conda) for users). +4. Create a virtual environment `heat_dev` with all dependencies via [heat_dev.yml](https://github.com/helmholtz-analytics/heat/blob/main/scripts/heat_dev.yml). Note that `heat_dev.yml` does not install Heat. ``` conda env create -f heat_dev.yml From df8eb6963583c0a412e2cc97826953761928fe26 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Wed, 27 Sep 2023 15:19:50 +0200 Subject: [PATCH 31/51] add ToC, conda badge --- README.md | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b9062d8961..1500c34771 100644 --- a/README.md +++ b/README.md @@ -14,14 +14,29 @@ Heat is a distributed tensor framework for high performance data analytics. [![license: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![PyPI Version](https://img.shields.io/pypi/v/heat)](https://pypi.org/project/heat/) [![Downloads](https://pepy.tech/badge/heat)](https://pepy.tech/project/heat) +[![Anaconda-Server Badge](https://anaconda.org/conda-forge/heat/badges/version.svg)](https://anaconda.org/conda-forge/heat) [![fair-software.eu](https://img.shields.io/badge/fair--software.eu-%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F-green)](https://fair-software.eu) [![OpenSSF Best Practices](https://bestpractices.coreinfrastructure.org/projects/7688/badge)](https://bestpractices.coreinfrastructure.org/projects/7688) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2531472.svg)](https://doi.org/10.5281/zenodo.2531472) [![Benchmarks](https://img.shields.io/badge/Github--Pages-Benchmarks-2ea44f)](https://helmholtz-analytics.github.io/heat/dev/bench) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -Table of contents: - COPILOT WILL FILL OUT AT THE END +- Table of Contents + - [What is Heat for?](#what-is-heat-for) + - [Features](#features) + - [Installation](#installation) + - [Requirements](#requirements) + - [pip](#pip) + - [conda](#conda) + - [Getting Started](#getting-started) + - [FAQ](#faq) + - [Support Channels](#support-channels) + - [Contribution guidelines](#contribution-guidelines) + - [Resources for MPI programming](#resources-for-mpi-programming) + - [License](#license) + - [Citing Heat](#citing-heat) + - [Acknowledgements](#acknowledgements) + # What is Heat for? @@ -64,7 +79,7 @@ Check out our [features](#features) and the [Heat API Reference](https://heat.re - h5py - netCDF4 -### pip +## pip Install the latest version with ```bash @@ -73,7 +88,7 @@ pip install heat[hdf5,netcdf] where the part in brackets is a list of optional dependencies. You can omit it, if you do not need HDF5 or NetCDF support. -### **conda** +## **conda** The conda build includes all dependencies **including OpenMPI**. ```bash From 4e910278e9f56299cf35c41e12276bb40d9b04c8 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Wed, 27 Sep 2023 15:26:52 +0200 Subject: [PATCH 32/51] add numpy coverage script --- numpy_coverage_tables.md | 387 ---------------------- scripts/numpy_coverage_tables.py | 545 +++++++++++++++++++++++++++++++ 2 files changed, 545 insertions(+), 387 deletions(-) delete mode 100644 numpy_coverage_tables.md create mode 100644 scripts/numpy_coverage_tables.py diff --git a/numpy_coverage_tables.md b/numpy_coverage_tables.md deleted file mode 100644 index 026ffa25a1..0000000000 --- a/numpy_coverage_tables.md +++ /dev/null @@ -1,387 +0,0 @@ -# NumPy Coverage Tables -This file is automatically generated by `numpy_coverage_tables.py`. -Please do not edit this file directly, but instead edit `numpy_coverage_tables.py` and run it to generate this file. -The following tables show the NumPy functions supported by Heat. -## Table of Contents -1. [NumPy Mathematical Functions](#numpy--mathematical-functions) -2. [NumPy Array Creation](#numpy-array-creation) -3. [NumPy Array Manipulation](#numpy-array-manipulation) -4. [NumPy Binary Operations](#numpy-binary-operations) -5. [NumPy IO Operations](#numpy-io-operations) -6. [NumPy LinAlg Operations](#numpy-linalg-operations) -7. [NumPy Logic Functions](#numpy-logic-functions) -8. [NumPy Sorting Operations](#numpy-sorting-operations) -9. [NumPy Statistical Operations](#numpy-statistical-operations) - -## NumPy Mathematical Functions -[Back to Table of Contents](#table-of-contents) - -| NumPy Mathematical Functions | Heat | -|---|---| -| sin | ✅ | -| cos | ✅ | -| tan | ✅ | -| arcsin | ✅ | -| arccos | ✅ | -| arctan | ✅ | -| hypot | ✅ | -| arctan2 | ✅ | -| degrees | ✅ | -| radians | ✅ | -| unwrap | ❌ | -| deg2rad | ✅ | -| rad2deg | ✅ | -| sinh | ✅ | -| cosh | ✅ | -| tanh | ✅ | -| arcsinh | ✅ | -| arccosh | ✅ | -| arctanh | ✅ | -| round | ✅ | -| around | ❌ | -| rint | ❌ | -| fix | ❌ | -| floor | ✅ | -| ceil | ✅ | -| trunc | ✅ | -| prod | ✅ | -| sum | ✅ | -| nanprod | ✅ | -| nansum | ✅ | -| cumprod | ✅ | -| cumsum | ✅ | -| nancumprod | ❌ | -| nancumsum | ❌ | -| diff | ✅ | -| ediff1d | ❌ | -| gradient | ❌ | -| cross | ✅ | -| trapz | ❌ | -| exp | ✅ | -| expm1 | ✅ | -| exp2 | ✅ | -| log | ✅ | -| log10 | ✅ | -| log2 | ✅ | -| log1p | ✅ | -| logaddexp | ✅ | -| logaddexp2 | ✅ | -| i0 | ❌ | -| sinc | ❌ | -| signbit | ✅ | -| copysign | ✅ | -| frexp | ❌ | -| ldexp | ❌ | -| nextafter | ❌ | -| spacing | ❌ | -| lcm | ✅ | -| gcd | ✅ | -| add | ✅ | -| reciprocal | ❌ | -| positive | ✅ | -| negative | ✅ | -| multiply | ✅ | -| divide | ✅ | -| power | ✅ | -| subtract | ✅ | -| true_divide | ❌ | -| floor_divide | ✅ | -| float_power | ❌ | -| fmod | ✅ | -| mod | ✅ | -| modf | ✅ | -| remainder | ✅ | -| divmod | ❌ | -| angle | ✅ | -| real | ✅ | -| imag | ✅ | -| conj | ✅ | -| conjugate | ✅ | -| maximum | ✅ | -| max | ✅ | -| amax | ❌ | -| fmax | ❌ | -| nanmax | ❌ | -| minimum | ✅ | -| min | ✅ | -| amin | ❌ | -| fmin | ❌ | -| nanmin | ❌ | -| convolve | ✅ | -| clip | ✅ | -| sqrt | ✅ | -| cbrt | ❌ | -| square | ✅ | -| absolute | ✅ | -| fabs | ✅ | -| sign | ✅ | -| heaviside | ❌ | -| nan_to_num | ✅ | -| real_if_close | ❌ | -| interp | ❌ | -## NumPy Array Creation -[Back to Table of Contents](#table-of-contents) - -| NumPy Array Creation | Heat | -|---|---| -| empty | ✅ | -| empty_like | ✅ | -| eye | ✅ | -| identity | ❌ | -| ones | ✅ | -| ones_like | ✅ | -| zeros | ✅ | -| zeros_like | ✅ | -| full | ✅ | -| full_like | ✅ | -| array | ✅ | -| asarray | ✅ | -| asanyarray | ❌ | -| ascontiguousarray | ❌ | -| asmatrix | ❌ | -| copy | ✅ | -| frombuffer | ❌ | -| from_dlpack | ❌ | -| fromfile | ❌ | -| fromfunction | ❌ | -| fromiter | ❌ | -| fromstring | ❌ | -| loadtxt | ❌ | -| arange | ✅ | -| linspace | ✅ | -| logspace | ✅ | -| geomspace | ❌ | -| meshgrid | ✅ | -| mgrid | ❌ | -| ogrid | ❌ | -| diag | ✅ | -| diagflat | ❌ | -| tri | ❌ | -| tril | ✅ | -| triu | ✅ | -| vander | ❌ | -| mat | ❌ | -| bmat | ❌ | -## NumPy Array Manipulation -[Back to Table of Contents](#table-of-contents) - -| NumPy Array Manipulation | Heat | -|---|---| -| copyto | ❌ | -| shape | ✅ | -| reshape | ✅ | -| ravel | ✅ | -| flat | ❌ | -| flatten | ✅ | -| moveaxis | ✅ | -| rollaxis | ❌ | -| swapaxes | ✅ | -| T | ❌ | -| transpose | ✅ | -| atleast_1d | ❌ | -| atleast_2d | ❌ | -| atleast_3d | ❌ | -| broadcast | ❌ | -| broadcast_to | ✅ | -| broadcast_arrays | ✅ | -| expand_dims | ✅ | -| squeeze | ✅ | -| asarray | ✅ | -| asanyarray | ❌ | -| asmatrix | ❌ | -| asfarray | ❌ | -| asfortranarray | ❌ | -| ascontiguousarray | ❌ | -| asarray_chkfinite | ❌ | -| require | ❌ | -| concatenate | ✅ | -| stack | ✅ | -| block | ❌ | -| vstack | ✅ | -| hstack | ✅ | -| dstack | ❌ | -| column_stack | ✅ | -| row_stack | ✅ | -| split | ✅ | -| array_split | ❌ | -| dsplit | ✅ | -| hsplit | ✅ | -| vsplit | ✅ | -| tile | ✅ | -| repeat | ✅ | -| delete | ❌ | -| insert | ❌ | -| append | ❌ | -| resize | ❌ | -| trim_zeros | ❌ | -| unique | ✅ | -| flip | ✅ | -| fliplr | ✅ | -| flipud | ✅ | -| reshape | ✅ | -| roll | ✅ | -| rot90 | ✅ | -## NumPy Binary Operations -[Back to Table of Contents](#table-of-contents) - -| NumPy Binary Operations | Heat | -|---|---| -| bitwise_and | ✅ | -| bitwise_or | ✅ | -| bitwise_xor | ✅ | -| invert | ✅ | -| left_shift | ✅ | -| right_shift | ✅ | -| packbits | ❌ | -| unpackbits | ❌ | -| binary_repr | ❌ | -## NumPy IO Operations -[Back to Table of Contents](#table-of-contents) - -| NumPy IO Operations | Heat | -|---|---| -| load | ✅ | -| save | ✅ | -| savez | ❌ | -| savez_compressed | ❌ | -| loadtxt | ❌ | -| savetxt | ❌ | -| genfromtxt | ❌ | -| fromregex | ❌ | -| fromstring | ❌ | -| tofile | ❌ | -| tolist | ❌ | -| array2string | ❌ | -| array_repr | ❌ | -| array_str | ❌ | -| format_float_positional | ❌ | -| format_float_scientific | ❌ | -| memmap | ❌ | -| open_memmap | ❌ | -| set_printoptions | ✅ | -| get_printoptions | ✅ | -| set_string_function | ❌ | -| printoptions | ❌ | -| binary_repr | ❌ | -| base_repr | ❌ | -| DataSource | ❌ | -| format | ❌ | -## NumPy LinAlg Operations -[Back to Table of Contents](#table-of-contents) - -| NumPy LinAlg Operations | Heat | -|---|---| -| dot | ✅ | -| linalg.multi_dot | ❌ | -| vdot | ✅ | -| inner | ❌ | -| outer | ✅ | -| matmul | ✅ | -| tensordot | ❌ | -| einsum | ❌ | -| einsum_path | ❌ | -| linalg.matrix_power | ❌ | -| kron | ❌ | -| linalg.cholesky | ❌ | -| linalg.qr | ❌ | -| linalg.svd | ❌ | -| linalg.eig | ❌ | -| linalg.eigh | ❌ | -| linalg.eigvals | ❌ | -| linalg.eigvalsh | ❌ | -| linalg.norm | ❌ | -| linalg.cond | ❌ | -| linalg.det | ❌ | -| linalg.matrix_rank | ❌ | -| linalg.slogdet | ❌ | -| trace | ✅ | -| linalg.solve | ❌ | -| linalg.tensorsolve | ❌ | -| linalg.lstsq | ❌ | -| linalg.inv | ❌ | -| linalg.pinv | ❌ | -| linalg.tensorinv | ❌ | -## NumPy Logic Functions -[Back to Table of Contents](#table-of-contents) - -| NumPy Logic Functions | Heat | -|---|---| -| all | ✅ | -| any | ✅ | -| isfinite | ✅ | -| isinf | ✅ | -| isnan | ✅ | -| isnat | ❌ | -| isneginf | ✅ | -| isposinf | ✅ | -| iscomplex | ✅ | -| iscomplexobj | ❌ | -| isfortran | ❌ | -| isreal | ✅ | -| isrealobj | ❌ | -| isscalar | ❌ | -| logical_and | ✅ | -| logical_or | ✅ | -| logical_not | ✅ | -| logical_xor | ✅ | -| allclose | ✅ | -| isclose | ✅ | -| array_equal | ❌ | -| array_equiv | ❌ | -| greater | ✅ | -| greater_equal | ✅ | -| less | ✅ | -| less_equal | ✅ | -| equal | ✅ | -| not_equal | ✅ | -## NumPy Sorting Operations -[Back to Table of Contents](#table-of-contents) - -| NumPy Sorting Operations | Heat | -|---|---| -| sort | ✅ | -| lexsort | ❌ | -| argsort | ❌ | -| sort | ✅ | -| sort_complex | ❌ | -| partition | ❌ | -| argpartition | ❌ | -| argmax | ✅ | -| nanargmax | ❌ | -| argmin | ✅ | -| nanargmin | ❌ | -| argwhere | ❌ | -| nonzero | ✅ | -| flatnonzero | ❌ | -| where | ✅ | -| searchsorted | ❌ | -| extract | ❌ | -| count_nonzero | ❌ | -## NumPy Statistical Operations -[Back to Table of Contents](#table-of-contents) - -| NumPy Statistical Operations | Heat | -|---|---| -| ptp | ❌ | -| percentile | ✅ | -| nanpercentile | ❌ | -| quantile | ❌ | -| nanquantile | ❌ | -| median | ✅ | -| average | ✅ | -| mean | ✅ | -| std | ✅ | -| var | ✅ | -| nanmedian | ❌ | -| nanmean | ❌ | -| nanstd | ❌ | -| nanvar | ❌ | -| corrcoef | ❌ | -| correlate | ❌ | -| cov | ✅ | -| histogram | ✅ | -| histogram2d | ❌ | -| histogramdd | ❌ | -| bincount | ✅ | -| histogram_bin_edges | ❌ | -| digitize | ✅ | diff --git a/scripts/numpy_coverage_tables.py b/scripts/numpy_coverage_tables.py new file mode 100644 index 0000000000..00beae0dad --- /dev/null +++ b/scripts/numpy_coverage_tables.py @@ -0,0 +1,545 @@ +import heat + +numpy_functions = [] + +# List of numpy functions +headers = {"0": "NumPy Mathematical Functions"} +numpy_mathematical_functions = [ + "sin", + "cos", + "tan", + "arcsin", + "arccos", + "arctan", + "hypot", + "arctan2", + "degrees", + "radians", + "unwrap", + "deg2rad", + "rad2deg", + "sinh", + "cosh", + "tanh", + "arcsinh", + "arccosh", + "arctanh", + "round", + "around", + "rint", + "fix", + "floor", + "ceil", + "trunc", + "prod", + "sum", + "nanprod", + "nansum", + "cumprod", + "cumsum", + "nancumprod", + "nancumsum", + "diff", + "ediff1d", + "gradient", + "cross", + "trapz", + "exp", + "expm1", + "exp2", + "log", + "log10", + "log2", + "log1p", + "logaddexp", + "logaddexp2", + "i0", + "sinc", + "signbit", + "copysign", + "frexp", + "ldexp", + "nextafter", + "spacing", + "lcm", + "gcd", + "add", + "reciprocal", + "positive", + "negative", + "multiply", + "divide", + "power", + "subtract", + "true_divide", + "floor_divide", + "float_power", + "fmod", + "mod", + "modf", + "remainder", + "divmod", + "angle", + "real", + "imag", + "conj", + "conjugate", + "maximum", + "max", + "amax", + "fmax", + "nanmax", + "minimum", + "min", + "amin", + "fmin", + "nanmin", + "convolve", + "clip", + "sqrt", + "cbrt", + "square", + "absolute", + "fabs", + "sign", + "heaviside", + "nan_to_num", + "real_if_close", + "interp", +] +numpy_functions.append(numpy_mathematical_functions) + +numpy_array_creation = [ + "empty", + "empty_like", + "eye", + "identity", + "ones", + "ones_like", + "zeros", + "zeros_like", + "full", + "full_like", + "array", + "asarray", + "asanyarray", + "ascontiguousarray", + "asmatrix", + "copy", + "frombuffer", + "from_dlpack", + "fromfile", + "fromfunction", + "fromiter", + "fromstring", + "loadtxt", + "arange", + "linspace", + "logspace", + "geomspace", + "meshgrid", + "mgrid", + "ogrid", + "diag", + "diagflat", + "tri", + "tril", + "triu", + "vander", + "mat", + "bmat", +] +numpy_functions.append(numpy_array_creation) +headers[str(len(headers))] = "NumPy Array Creation" + +numpy_array_manipulation = [ + "copyto", + "shape", + "reshape", + "ravel", + "flat", + "flatten", + "moveaxis", + "rollaxis", + "swapaxes", + "T", + "transpose", + "atleast_1d", + "atleast_2d", + "atleast_3d", + "broadcast", + "broadcast_to", + "broadcast_arrays", + "expand_dims", + "squeeze", + "asarray", + "asanyarray", + "asmatrix", + "asfarray", + "asfortranarray", + "ascontiguousarray", + "asarray_chkfinite", + "require", + "concatenate", + "stack", + "block", + "vstack", + "hstack", + "dstack", + "column_stack", + "row_stack", + "split", + "array_split", + "dsplit", + "hsplit", + "vsplit", + "tile", + "repeat", + "delete", + "insert", + "append", + "resize", + "trim_zeros", + "unique", + "flip", + "fliplr", + "flipud", + "reshape", + "roll", + "rot90", +] +numpy_functions.append(numpy_array_manipulation) +headers[str(len(headers))] = "NumPy Array Manipulation" + +numpy_binary_operations = [ + "bitwise_and", + "bitwise_or", + "bitwise_xor", + "invert", + "left_shift", + "right_shift", + "packbits", + "unpackbits", + "binary_repr", +] +numpy_functions.append(numpy_binary_operations) +headers[str(len(headers))] = "NumPy Binary Operations" + +numpy_io_operations = [ + # numpy.load + # numpy.save + # numpy.savez_compressed + # numpy.loadtxt + # numpy.savez + # numpy.savetxt + # numpy.genfromtxt + # numpy.fromregex + # numpy.fromstring + # numpy.ndarray.tofile + # numpy.ndarray.tolist + # numpy.array2string + # numpy.array_repr + # numpy.array_str + # numpy.format_float_positional + # numpy.format_float_scientific + # numpy.memmap + # numpy.lib.format.open_memmap + # numpy.set_printoptions + # numpy.get_printoptions + # numpy.set_string_function + # numpy.printoptions + # numpy.binary_repr + # numpy.base_repr + # numpy.DataSource + # numpy.lib.format + "load", + "save", + "savez", + "savez_compressed", + "loadtxt", + "savetxt", + "genfromtxt", + "fromregex", + "fromstring", + "tofile", + "tolist", + "array2string", + "array_repr", + "array_str", + "format_float_positional", + "format_float_scientific", + "memmap", + "open_memmap", + "set_printoptions", + "get_printoptions", + "set_string_function", + "printoptions", + "binary_repr", + "base_repr", + "DataSource", + "format", +] +numpy_functions.append(numpy_io_operations) +headers[str(len(headers))] = "NumPy IO Operations" + +numpy_linalg_operations = [ + # numpy.dot + # numpy.linalg.multi_dot + # numpy.vdot + # numpy.inner + # numpy.outer + # numpy.matmul + # numpy.tensordot + # numpy.einsum + # numpy.einsum_path + # numpy.linalg.matrix_power + # numpy.kron + # numpy.linalg.cholesky + # numpy.linalg.qr + # numpy.linalg.svd + # numpy.linalg.eig + # numpy.linalg.eigh + # numpy.linalg.eigvals + # numpy.linalg.eigvalsh + # numpy.linalg.norm + # numpy.linalg.cond + # numpy.linalg.det + # numpy.linalg.matrix_rank + # numpy.linalg.slogdet + # numpy.trace + # numpy.linalg.solve + # numpy.linalg.tensorsolve + # numpy.linalg.lstsq + # numpy.linalg.inv + # numpy.linalg.pinv + # numpy.linalg.tensorinv + "dot", + "linalg.multi_dot", + "vdot", + "inner", + "outer", + "matmul", + "tensordot", + "einsum", + "einsum_path", + "linalg.matrix_power", + "kron", + "linalg.cholesky", + "linalg.qr", + "linalg.svd", + "linalg.eig", + "linalg.eigh", + "linalg.eigvals", + "linalg.eigvalsh", + "linalg.norm", + "linalg.cond", + "linalg.det", + "linalg.matrix_rank", + "linalg.slogdet", + "trace", + "linalg.solve", + "linalg.tensorsolve", + "linalg.lstsq", + "linalg.inv", + "linalg.pinv", + "linalg.tensorinv", +] +numpy_functions.append(numpy_linalg_operations) +headers[str(len(headers))] = "NumPy LinAlg Operations" + +numpy_logic_operations = [ + # numpy.all + # numpy.any + # numpy.isinf + # numpy.isfinite + # numpy.isnan + # numpy.isnat + # numpy.isneginf + # numpy.isposinf + # numpy.iscomplex + # numpy.iscomplexobj + # numpy.isfortran + # numpy.isreal + # numpy.isrealobj + # numpy.isscalar + # numpy.logical_and + # numpy.logical_or + # numpy.logical_not + # numpy.logical_xor + # numpy.allclose + # numpy.isclose + # numpy.array_equal + # numpy.array_equiv + # numpy.greater + # numpy.greater_equal + # numpy.less + # numpy.less_equal + # numpy.equal + # numpy.not_equal + "all", + "any", + "isfinite", + "isinf", + "isnan", + "isnat", + "isneginf", + "isposinf", + "iscomplex", + "iscomplexobj", + "isfortran", + "isreal", + "isrealobj", + "isscalar", + "logical_and", + "logical_or", + "logical_not", + "logical_xor", + "allclose", + "isclose", + "array_equal", + "array_equiv", + "greater", + "greater_equal", + "less", + "less_equal", + "equal", + "not_equal", +] +numpy_functions.append(numpy_logic_operations) +headers[str(len(headers))] = "NumPy Logic Functions" + +numpy_sorting_operations = [ + # numpy.sort + # numpy.lexsort + # numpy.argsort + # numpy.ndarray.sort + # numpy.sort_complex + # numpy.partition + # numpy.argpartition + # numpy.argmax + # numpy.nanargmax + # numpy.argmin + # numpy.nanargmin + # numpy.argwhere + # numpy.nonzero + # numpy.flatnonzero + # numpy.where + # numpy.searchsorted + # numpy.extract + # numpy.count_nonzero + "sort", + "lexsort", + "argsort", + "sort", + "sort_complex", + "partition", + "argpartition", + "argmax", + "nanargmax", + "argmin", + "nanargmin", + "argwhere", + "nonzero", + "flatnonzero", + "where", + "searchsorted", + "extract", + "count_nonzero", +] +numpy_functions.append(numpy_sorting_operations) +headers[str(len(headers))] = "NumPy Sorting Operations" + +numpy_statistics_operations = [ + # numpy.ptp + # numpy.percentile + # numpy.nanpercentile + # numpy.quantile + # numpy.nanquantile + # numpy.median + # numpy.average + # numpy.mean + # numpy.std + # numpy.var + # numpy.nanmedian + # numpy.nanmean + # numpy.nanstd + # numpy.nanvar + # numpy.corrcoef + # numpy.correlate + # numpy.cov + # numpy.histogram + # numpy.histogram2d + # numpy.histogramdd + # numpy.bincount + # numpy.histogram_bin_edges + # numpy.digitize + "ptp", + "percentile", + "nanpercentile", + "quantile", + "nanquantile", + "median", + "average", + "mean", + "std", + "var", + "nanmedian", + "nanmean", + "nanstd", + "nanvar", + "corrcoef", + "correlate", + "cov", + "histogram", + "histogram2d", + "histogramdd", + "bincount", + "histogram_bin_edges", + "digitize", +] +numpy_functions.append(numpy_statistics_operations) +headers[str(len(headers))] = "NumPy Statistical Operations" + +# initialize markdown file +# open the file in write mode +f = open("numpy_coverage_tables.md", "w") +# write in file +f.write("# NumPy Coverage Tables\n") +f.write("This file is automatically generated by `./scripts/numpy_coverage_tables.py`.\n") +f.write( + "Please do not edit this file directly, but instead edit `./scripts/numpy_coverage_tables.py` and run it to generate this file.\n" +) +f.write("The following tables show the NumPy functions supported by Heat.\n") + +# create Table of Contents +f.write("## Table of Contents\n") +for i, header in enumerate(headers): + f.write(f"{i+1}. [{headers[header]}](#{headers[header].lower().replace(' ', '-')})\n") +f.write("\n") + +for i, function_list in enumerate(numpy_functions): + f.write(f"## {headers[str(i)]}\n") + # Initialize a list to store the rows of the Markdown table + table_rows = [] + + # Check if functions exist in the heat library and create table rows + for func_name in function_list: + if hasattr(heat, func_name): + support_status = "✅" # Green checkmark for supported functions + else: + support_status = "❌" # Red cross for unsupported functions + + table_row = f"| {func_name} | {support_status} |" + table_rows.append(table_row) + + # Create the Markdown table header + table_header = f"| {headers[str(i)]} | Heat |\n|---|---|\n" + + # Combine the header and table rows + markdown_table = table_header + "\n".join(table_rows) + + # write link to table of contents + f.write("[Back to Table of Contents](#table-of-contents)\n\n") + # Print the Markdown table + f.write(markdown_table) + f.write("\n") From dca2a96169898275273a4aeb0d80a4a2c5f38554 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Wed, 27 Sep 2023 17:24:37 +0200 Subject: [PATCH 33/51] added some unittests for dxarray --- heat/dxarray/dxarray.py | 36 +-- heat/dxarray/dxarray_sanitation.py | 32 ++- heat/dxarray/test_dxarray.py | 29 --- heat/dxarray/tests/test_dxarray.py | 340 +++++++++++++++++++++++++++++ 4 files changed, 384 insertions(+), 53 deletions(-) delete mode 100644 heat/dxarray/test_dxarray.py create mode 100644 heat/dxarray/tests/test_dxarray.py diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py index 79f7c07b49..3f685ca559 100644 --- a/heat/dxarray/dxarray.py +++ b/heat/dxarray/dxarray.py @@ -10,7 +10,7 @@ # imports of "dxarray_..."-dependencies at the end to avoid cyclic dependence -__all__ = ["DXarray", "from_xarray"] +__all__ = ["dim_name_to_idx", "dim_idx_to_name", "DXarray", "from_xarray"] # Auxiliary functions @@ -103,28 +103,32 @@ def __init__( self.__device = values.device self.__comm = values.comm + # if no names are provided, introduce generic names "dim_N", N = 0,1,... + if dims is None: + dims = ["dim_%d" % k for k in range(self.__values.ndim)] + + self.__dims = dims + # ... and determine those not directly given: # since we are in the DXarray class, split dimension is given by a string self.__split = dim_idx_to_name(dims, values.split) # determine dimensions with and without coordinates if coords is not None: - dims_with_coords = sum([list(it[0]) for it in coords.items()], []) + dims_with_coords = sum( + [list(it[0]) if isinstance(it[0], tuple) else [it[0]] for it in coords.items()], [] + ) else: dims_with_coords = [] dims_without_coords = [dim for dim in dims if dim not in dims_with_coords] - self.__dims_with_cooords = dims_with_coords + self.__dims_with_coords = dims_with_coords self.__dims_without_coords = dims_without_coords # check if all appearing DNDarrays are balanced: as a result, the DXarray is balanced if and only if all DNDarrays are balanced - self.__balanced = dxarray_sanitation.check_if_balanced(self.__values, self.__coords) - - # if no names are provided, introduce generic names "dim_N", N = 0,1,... - if dims is None: - self.__dims = ["dim_%d" % k for k in range(self.__values.ndim)] - else: - self.__dims = dims + self.__balanced = dxarray_sanitation.check_if_balanced( + self.__values, self.__coords, force_check=False + ) """ Attribute getters and setters for the DXarray class @@ -187,18 +191,18 @@ def attrs(self) -> dict: return self.__attrs @property - def dims_with_coordinates(self) -> list: + def dims_with_coords(self) -> list: """ Get list of dims with coordinates from DXarray """ - return self.__dims_with_coordinates + return self.__dims_with_coords @property - def dims_without_coordinates(self) -> list: + def dims_without_coords(self) -> list: """ Get list of dims without coordinates from DXarray """ - return self.__dims_without_coordinates + return self.__dims_without_coords @property def balanced(self) -> bool: @@ -350,7 +354,9 @@ def is_balanced(self, force_check: bool = False) -> bool: """ if self.__balanced is None or force_check: - self.__balanced = dxarray_sanitation.check_if_balanced(self.__values, self.__coords) + self.__balanced = dxarray_sanitation.check_if_balanced( + self.__values, self.__coords, force_check=True + ) return self.__balanced def resplit_(self, dim: Union[str, None] = None): diff --git a/heat/dxarray/dxarray_sanitation.py b/heat/dxarray/dxarray_sanitation.py index 775b546595..cc66461226 100644 --- a/heat/dxarray/dxarray_sanitation.py +++ b/heat/dxarray/dxarray_sanitation.py @@ -22,6 +22,11 @@ def check_compatibility_values_dims_coords( if not (isinstance(coords, dict) or coords is None): raise TypeError("Input `coords` must be a dictionary or None, but is ", type(coords), ".") + # check if entries of dims are unique + if dims is not None: + if len(set(dims)) != len(dims): + raise ValueError("Entries of `dims` must be unique.") + # check if names of dims are given (and whether their number fits the number of dims of the values array) if dims is not None: if len(dims) != values.ndim: @@ -166,19 +171,28 @@ def check_attrs(attrs: Any): raise TypeError("`attrs` must be a dictionary or None, but is ", type(attrs), ".") -def check_if_balanced(values: ht.DNDarray, coords: Union[dict, None]): +def check_if_balanced(values: ht.DNDarray, coords: Union[dict, None], force_check: bool = False): """ Checks if a DXarray with values and coords is balanced, i.e., equally distributed on each process A DXarray is balanced if and only if all underlying DNDarrays are balanced. + force_check allows to force a check on balancedness of the underlying DNDarrays. """ - if values.balanced is None: - return None - else: - if coords is not None: - if None in [coord_item[1].balanced for coord_item in coords.items()]: + if not force_check: + if values.balanced is None or values.balanced is False or coords is None: + return values.balanced + else: + coords_balanced = [coord_item[1].balanced for coord_item in coords.items()] + if None in coords_balanced: return None else: - balanced = values.balanced and all( - [coord_item[1].balanced for coord_item in coords.items()] - ) + balanced = values.balanced and all(coords_balanced) return balanced + else: + values_balanced = values.is_balanced(force_check=True) + if values_balanced is False or coords is None: + return values_balanced + else: + coords_balanced = [ + coord_item[1].is_balanced(force_check=True) for coord_item in coords.items() + ] + return values_balanced and all(coords_balanced) diff --git a/heat/dxarray/test_dxarray.py b/heat/dxarray/test_dxarray.py deleted file mode 100644 index 3e0b013bb1..0000000000 --- a/heat/dxarray/test_dxarray.py +++ /dev/null @@ -1,29 +0,0 @@ -import torch -import os -import unittest -import heat as ht -import numpy as np -import xarray as xr -from mpi4py import MPI - -from heat.core.tests.test_suites.basic_test import TestCase - - -class TestDXarray(TestCase): - def test_attributes(self): - pass - - def test_is_balanced(self): - pass - - def test_resplit_(self): - pass - - def test_balance_(self): - pass - - def test_xarray(self): - pass - - def test_from_xarray(self): - pass diff --git a/heat/dxarray/tests/test_dxarray.py b/heat/dxarray/tests/test_dxarray.py new file mode 100644 index 0000000000..22517ac506 --- /dev/null +++ b/heat/dxarray/tests/test_dxarray.py @@ -0,0 +1,340 @@ +import torch +import os +import unittest +import heat as ht +import numpy as np +import xarray as xr +from mpi4py import MPI + +from heat.core.tests.test_suites.basic_test import TestCase + +nprocs = MPI.COMM_WORLD.Get_size() + + +class TestHelpers(TestCase): + def test_dim_name_idx_conversion(self): + dims = ["x", "y", "z-axis", "time", None] + for names in ["z-axis", ("time", "x"), ["x", "y"]]: + idxs = ht.dxarray.dim_name_to_idx(dims, names) + # check for correct types (str, tuple or list) + self.assertTrue( + type(idxs) == type(names) or (isinstance(names, str) and isinstance(idxs, int)) + ) + # check if dim_name_to_idx and dim_idx_to_name are inverse to each other + names_back = ht.dxarray.dim_idx_to_name(dims, idxs) + self.assertEqual(names_back, names) + # check if TypeError is raised for wrong input types + names = 3.14 + with self.assertRaises(TypeError): + ht.dxarray.dim_name_to_idx(dims, names) + with self.assertRaises(TypeError): + ht.dxarray.dim_idx_to_name(dims, names) + + +class TestDXarray(TestCase): + def test_constructor_and_attributes(self): + m = 2 + n = 3 * nprocs + k = 10 + ell = 2 + + # test constructor in a case that should work and also test if all attributes of the DXarray are set correctly + # here we include a dimension ("no_measurements") without coordinates and two dimensions ("x", "y") with physical instead of logical coordinates + xy = ht.random.rand(m, n, split=1) + t = ht.linspace(-1, 1, k, split=None) + attrs_xy = {"units_xy": "meters"} + xy_coords = ht.dxarray.DXarray( + xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" + ) + data = ht.random.randn(m, n, k, ell, split=1) + name = "mytestarray" + attrs = { + "units time": "seconds", + "measured data": "something really random and meaningless", + } + dims = ["x", "y", "time", "no_measurements"] + coords = {("x", "y"): xy_coords, "time": t} + + dxarray = ht.dxarray.DXarray(data, dims=dims, coords=coords, name=name, attrs=attrs) + + self.assertEqual(dxarray.name, name) + self.assertEqual(dxarray.attrs, attrs) + self.assertEqual(dxarray.dims, dims) + self.assertEqual(dxarray.coords, coords) + self.assertTrue(ht.allclose(dxarray.values, data)) + self.assertEqual(dxarray.device, data.device) + self.assertEqual(dxarray.comm, data.comm) + self.assertEqual(dxarray.dims_with_coords, ["x", "y", "time"]) + self.assertEqual(dxarray.dims_without_coords, ["no_measurements"]) + self.assertEqual(dxarray.split, "y") + self.assertEqual(dxarray.balanced, True) + + # test print + print(dxarray) + + # special case that dim names have to bet set automatically and that there are no coords at all + dxarray = ht.dxarray.DXarray(data) + dims = ["dim_0", "dim_1", "dim_2", "dim_3"] + self.assertEqual(dxarray.dims, dims) + self.assertEqual(dxarray.dims_with_coords, []) + self.assertEqual(dxarray.dims_without_coords, dims) + self.assertEqual(dxarray.split, "dim_1") + self.assertEqual(dxarray.balanced, True) + + # test print + print(dxarray) + + def test_sanity_checks(self): + m = 2 + n = 3 * nprocs + k = 5 * nprocs + ell = 2 + + # here comes the "correct" data + xy = ht.random.rand(m, n, split=1) + t = ht.linspace(-1, 1, k, split=None) + attrs_xy = {"units_xy": "meters"} + xy_coords = ht.dxarray.DXarray( + xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" + ) + data = ht.random.randn(m, n, k, ell, split=1) + name = "mytestarray" + attrs = { + "units time": "seconds", + "measured data": "something really random and meaningless", + } + dims = ["x", "y", "time", "no_measurements"] + coords = {("x", "y"): xy_coords, "time": t} + + # wrong data type for name + with self.assertRaises(TypeError): + dxarray = ht.dxarray.DXarray(data, dims=dims, coords=coords, name=3.14, attrs=attrs) + + # wrong data type for attrs + with self.assertRaises(TypeError): + dxarray = ht.dxarray.DXarray(data, dims=dims, coords=coords, name=name, attrs=3.14) + + # wrong data type for value + with self.assertRaises(TypeError): + dxarray = ht.dxarray.DXarray(3.14, dims=dims, coords=coords, name=name, attrs=attrs) + + # wrong data type for dims + with self.assertRaises(TypeError): + dxarray = ht.dxarray.DXarray(data, dims=3.14, coords=coords, name=name, attrs=attrs) + + # wrong data type for coords + with self.assertRaises(TypeError): + dxarray = ht.dxarray.DXarray(data, dims=dims, coords=3.14, name=name, attrs=attrs) + + # length of dims and number of dimensions of value array do not match + with self.assertRaises(ValueError): + dxarray = ht.dxarray.DXarray( + data, dims=["x", "y", "time"], coords=coords, name=name, attrs=attrs + ) + + # entries of dims are not unique + with self.assertRaises(ValueError): + dxarray = ht.dxarray.DXarray( + data, dims=["x", "y", "x", "no_measurements"], coords=coords, name=name, attrs=attrs + ) + + # coordinate array for single dimension is not a DNDarray + wrong_coords = {("x", "y"): xy_coords, "time": 3.14} + with self.assertRaises(TypeError): + dxarray = ht.dxarray.DXarray( + data, dims=dims, coords=wrong_coords, name=name, attrs=attrs + ) + + # coordinate array for single dimension has wrong dimensionality + wrong_coords = {("x", "y"): xy_coords, "time": ht.ones((k, 2))} + with self.assertRaises(ValueError): + dxarray = ht.dxarray.DXarray( + data, dims=dims, coords=wrong_coords, name=name, attrs=attrs + ) + + # device of a coordinate array does not coincide with device of value array + # TBD - how to test this? + + # communicator of a coordinate array does not coincide with communicator of value array + # TBD - how to test this? + + # size of value array in a dimension does not coincide with size of coordinate array in this dimension + wrong_coords = {("x", "y"): xy_coords, "time": ht.ones(nprocs * k + 1)} + with self.assertRaises(ValueError): + dxarray = ht.dxarray.DXarray( + data, dims=dims, coords=wrong_coords, name=name, attrs=attrs + ) + + # value array is split along a dimension, but cooresponding coordinate array is not split along this dimension + wrong_data = ht.resplit(data, 2) + with self.assertRaises(ValueError): + dxarray = ht.dxarray.DXarray( + wrong_data, dims=dims, coords=coords, name=name, attrs=attrs + ) + + # value array is not split along a dimension, but cooresponding coordinate array is split along this dimension + wrong_coords = {("x", "y"): xy_coords, "time": ht.resplit(t, 0)} + with self.assertRaises(ValueError): + dxarray = ht.dxarray.DXarray( + data, dims=dims, coords=wrong_coords, name=name, attrs=attrs + ) + + # coordinate array in the case of "physical coordinates" is not a DXarray + wrong_coords = {("x", "y"): 3.14, "time": t} + with self.assertRaises(TypeError): + dxarray = ht.dxarray.DXarray( + data, dims=dims, coords=wrong_coords, name=name, attrs=attrs + ) + + # dimension names in coordinate DXarray in the case of "physical coordinates" do not coincide with dimension names of value array + wrong_coords_xy = ht.dxarray.DXarray( + xy, dims=["xx", "yy"], attrs=attrs_xy, name="coordinates of space" + ) + wrong_coords = {("x", "y"): wrong_coords_xy, "time": t} + with self.assertRaises(ValueError): + dxarray = ht.dxarray.DXarray( + data, dims=dims, coords=wrong_coords, name=name, attrs=attrs + ) + + # size of values for physical coordinates does not coincide with size of the respective coordinate array + wrong_xy = ht.random.rand(m + 1, n, split=1) + wrong_coords_xy = ht.dxarray.DXarray( + wrong_xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" + ) + wrong_coords = {("x", "y"): wrong_coords_xy, "time": t} + with self.assertRaises(ValueError): + dxarray = ht.dxarray.DXarray( + data, dims=dims, coords=wrong_coords, name=name, attrs=attrs + ) + + # communicator of coordinate array for physical coordinates does not coincide with communicator of value array + # TBD - how to test this? + + # device of coordinate array for physical coordinates does not coincide with device of value array + # TBD - how to test this? + + # coordinate array for physical coordinates is not split along the split dimension of the value array (two cases) + wrong_data = ht.random.randn(m, n, k, ell) + with self.assertRaises(ValueError): + dxarray = ht.dxarray.DXarray( + wrong_data, dims=dims, coords=coords, name=name, attrs=attrs + ) + wrong_xy = ht.random.rand(m, n) + wrong_coords_xy = ht.dxarray.DXarray( + wrong_xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" + ) + wrong_coords = {("x", "y"): wrong_coords_xy, "time": t} + with self.assertRaises(ValueError): + dxarray = ht.dxarray.DXarray( + data, dims=dims, coords=wrong_coords, name=name, attrs=attrs + ) + dxarray *= 1 + + def test_balanced_and_balancing(self): + m = 2 + n = 5 * nprocs + k = 2 + ell = 2 + + # create a highly unbalanced array for the values but not for the coordinates + xy = ht.random.rand(m, n, split=1) + xy = xy[:, 4:] + xy.balance_() + t = ht.linspace(-1, 1, k, split=None) + attrs_xy = {"units_xy": "meters"} + xy_coords = ht.dxarray.DXarray( + xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" + ) + data = ht.random.randn(m, n, k, ell, split=1) + data = data[:, 4:, :, :] + name = "mytestarray" + attrs = { + "units time": "seconds", + "measured data": "something really random and meaningless", + } + dims = ["x", "y", "time", "no_measurements"] + coords = {("x", "y"): xy_coords, "time": t} + + dxarray = ht.dxarray.DXarray(data, dims=dims, coords=coords, name=name, attrs=attrs) + + # balancedness-status is first unknown, then known as false (if explicitly checked) and finally known as false after this check + self.assertEqual(dxarray.balanced, None) + self.assertEqual(dxarray.is_balanced(), False) + self.assertEqual(dxarray.balanced, False) + + # rebalancing should work + dxarray.balance_() + self.assertEqual(dxarray.balanced, True) + self.assertEqual(dxarray.is_balanced(force_check=True), True) + + # rebalanced array should be equal to original one + self.assertTrue(ht.allclose(dxarray.values, data)) + self.assertEqual(dxarray.dims, dims) + self.assertEqual(dxarray.dims_with_coords, ["x", "y", "time"]) + self.assertEqual(dxarray.dims_without_coords, ["no_measurements"]) + self.assertEqual(dxarray.name, name) + self.assertEqual(dxarray.attrs, attrs) + # TBD: check for equality of coordinate arrays + + def test_resplit_(self): + m = 2 * nprocs + n = 3 * nprocs + k = 5 * nprocs + ell = 2 + + xy = ht.random.rand(m, n, split=1) + t = ht.linspace(-1, 1, k, split=None) + attrs_xy = {"units_xy": "meters"} + xy_coords = ht.dxarray.DXarray( + xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" + ) + data = ht.random.randn(m, n, k, ell, split=1) + name = "mytestarray" + attrs = { + "units time": "seconds", + "measured data": "something really random and meaningless", + } + dims = ["x", "y", "time", "no_measurements"] + coords = {("x", "y"): xy_coords, "time": t} + + dxarray = ht.dxarray.DXarray(data, dims=dims, coords=coords, name=name, attrs=attrs) + for newsplit in ["x", "time", None, "y"]: + dxarray.resplit_(newsplit) + self.assertEqual(dxarray.split, newsplit) + self.assertTrue(ht.allclose(dxarray.values, data)) + self.assertEqual(dxarray.dims, dims) + self.assertEqual(dxarray.dims_with_coords, ["x", "y", "time"]) + self.assertEqual(dxarray.dims_without_coords, ["no_measurements"]) + self.assertEqual(dxarray.name, name) + self.assertEqual(dxarray.attrs, attrs) + # TBD: check for equality of coordinate arrays + + def test_to_and_from_xarray(self): + m = 2 + n = 3 * nprocs + k = 10 + ell = 2 + + # test constructor in a case that should work and also test if all attributes of the DXarray are set correctly + # here we include a dimension ("no_measurements") without coordinates and two dimensions ("x", "y") with physical instead of logical coordinates + xy = ht.random.rand(m, n, split=1) + t = ht.linspace(-1, 1, k, split=None) + attrs_xy = {"units_xy": "meters"} + xy_coords = ht.dxarray.DXarray( + xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" + ) + data = ht.random.randn(m, n, k, ell, split=1) + name = "mytestarray" + attrs = { + "units time": "seconds", + "measured data": "something really random and meaningless", + } + dims = ["x", "y", "time", "no_measurements"] + coords = {("x", "y"): xy_coords, "time": t} + + dxarray = ht.dxarray.DXarray(data, dims=dims, coords=coords, name=name, attrs=attrs) + + xarray = dxarray.xarray() + print(xarray) + # TBD convert back and check for equality (or the other way round?) + # dxarray_from_xarray = ht.dxarray.from_xarray(xarray,split=dxarray.split,device=dxarray.device) From df070badf87609032d73a870ea3091b0a2b32ccf Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 28 Sep 2023 10:51:12 +0200 Subject: [PATCH 34/51] added a few sentences regarding "GPU-support" and "HPC-systems" to section "Requirements" --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 1500c34771..af15f073c3 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,14 @@ Check out our [features](#features) and the [Heat API Reference](https://heat.re - h5py - netCDF4 +### GPU-support +In order to do computations on your GPU(s), you will require a CUDA or ROCm installation matching your hardware and its drivers. Moreover, +- your PyTorch installation must be compiled with CUDA/ROCm support. + +### HPC-systems +On most HPC-systems you will not be able to install/compile MPI or CUDA/ROCm yourself. Instead, you will most likely need to load a pre-installed MPI and/or CUDA/ROCm module from the module system. Maybe, you will even find PyTorch, h5py, or mpi4py as (part of) such a module. Note that for optimal performance on GPU, you need to usa an MPI library that has been compiled with CUDA/ROCm support (e.g., so-called "CUDA-aware MPI"). + + ## pip Install the latest version with From f5806ee12372529825ae165ca58b8a88e261b236 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 28 Sep 2023 10:54:42 +0200 Subject: [PATCH 35/51] changed name of output file in automatic coverage table generation --- scripts/numpy_coverage_tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/numpy_coverage_tables.py b/scripts/numpy_coverage_tables.py index 00beae0dad..4124503eec 100644 --- a/scripts/numpy_coverage_tables.py +++ b/scripts/numpy_coverage_tables.py @@ -502,7 +502,7 @@ # initialize markdown file # open the file in write mode -f = open("numpy_coverage_tables.md", "w") +f = open("coverage_tables.md", "w") # write in file f.write("# NumPy Coverage Tables\n") f.write("This file is automatically generated by `./scripts/numpy_coverage_tables.py`.\n") From ed9548e7d57d6d0c705bdaef7e2d781e3199071f Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 28 Sep 2023 11:03:37 +0200 Subject: [PATCH 36/51] removed dxarray which came into this branch for whatever reason... --- coverage_tables.md | 4 +- heat/dxarray/__init__.py | 7 - heat/dxarray/dxarray.py | 461 -------------------------- heat/dxarray/dxarray_manipulations.py | 8 - heat/dxarray/dxarray_operations.py | 8 - heat/dxarray/dxarray_sanitation.py | 198 ----------- heat/dxarray/tests/__init__.py | 0 heat/dxarray/tests/test_dxarray.py | 340 ------------------- scripts/numpy_coverage_tables.py | 2 +- 9 files changed, 3 insertions(+), 1025 deletions(-) delete mode 100644 heat/dxarray/__init__.py delete mode 100644 heat/dxarray/dxarray.py delete mode 100644 heat/dxarray/dxarray_manipulations.py delete mode 100644 heat/dxarray/dxarray_operations.py delete mode 100644 heat/dxarray/dxarray_sanitation.py delete mode 100644 heat/dxarray/tests/__init__.py delete mode 100644 heat/dxarray/tests/test_dxarray.py diff --git a/coverage_tables.md b/coverage_tables.md index 026ffa25a1..5246aaebf9 100644 --- a/coverage_tables.md +++ b/coverage_tables.md @@ -1,6 +1,6 @@ # NumPy Coverage Tables -This file is automatically generated by `numpy_coverage_tables.py`. -Please do not edit this file directly, but instead edit `numpy_coverage_tables.py` and run it to generate this file. +This file is automatically generated by `./scripts/numpy_coverage_tables.py`. +Please do not edit this file directly, but instead edit `./scripts/numpy_coverage_tables.py` and run it to generate this file. The following tables show the NumPy functions supported by Heat. ## Table of Contents 1. [NumPy Mathematical Functions](#numpy--mathematical-functions) diff --git a/heat/dxarray/__init__.py b/heat/dxarray/__init__.py deleted file mode 100644 index 73644d9223..0000000000 --- a/heat/dxarray/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -import into heat.dxarray namespace -""" - -from .dxarray import * -from .dxarray_sanitation import * -from .dxarray_manipulations import * diff --git a/heat/dxarray/dxarray.py b/heat/dxarray/dxarray.py deleted file mode 100644 index 3f685ca559..0000000000 --- a/heat/dxarray/dxarray.py +++ /dev/null @@ -1,461 +0,0 @@ -""" -Implements a distributed counterpart of xarray built on top of Heats DNDarray class -""" - -import torch -import heat as ht -import xarray as xr -from xarray import DataArray -from typing import Union - -# imports of "dxarray_..."-dependencies at the end to avoid cyclic dependence - -__all__ = ["dim_name_to_idx", "dim_idx_to_name", "DXarray", "from_xarray"] - -# Auxiliary functions - - -def dim_name_to_idx(dims: list, names: Union[str, tuple, list, None]) -> Union[int, tuple, list]: - """ - Converts a string "names" (or tuple of strings) referring to dimensions stored in "dims" to the corresponding numeric index (tuple of indices) of these dimensions. - Inverse of :func:`dim_idx_to_name`. - """ - if names is None: - return None - elif isinstance(names, str): - return dims.index(names) - elif isinstance(names, tuple): - names_list = list(names) - return tuple([dims.index(name) for name in names_list]) - elif isinstance(names, list): - return [dims.index(name) for name in names] - else: - raise TypeError("Input names must be None, string, list of strings, or tuple of strings.") - - -def dim_idx_to_name(dims: list, idxs: Union[int, tuple, list, None]) -> Union[str, tuple, list]: - """ - Converts an numeric index "idxs" (or tuple of such indices) referring to the dimensions stored in "dims" to the corresponding name string (or tuple of name strings). - Inverse of :func:`dim_name_to_idx`. - """ - if idxs is None: - return None - elif isinstance(idxs, int): - return dims[idxs] - elif isinstance(idxs, tuple): - idxs_list = list(idxs) - return tuple([dims[idx] for idx in idxs_list]) - elif isinstance(idxs, list): - return [dims[idx] for idx in idxs] - else: - raise TypeError("Input idxs must be None, int, list of ints, or tuple of ints.") - - -class DXarray: - """ - Distributed counterpart of xarray. - - Parameters - -------------- - values: DNDarray - data entries of the DXarray - dims: list - names of the dimensions of the DXarray - coords: dictionary - coordinates - entries of the dictionary have the form `dim`:`coords_of_dim` for each `dim` in `dims`, - where `coords_of_dim` can either be a list of coordinate labels ("logical coordinates") or an - DXarray of same shape as the original one, also split along the same split axis ("physical coordinates"). - split: Union[int,None] - dimension along which the DXarray is split (analogous to split dimension of DNDarray) - - Notes - --------------- - Some attributes of DNDarray are not included in DXarray, e.g., gshape, lshape, larray etc., and need to be accessed by - DXarray.values.gshape etc. - This is in order to avoid confusion, because a DXarray is built of possibly several DNDarrays which could cause confusion - to which gshape etc. a global attribute DXarray.gshape could refer to. - Currently, it is checked whether values and coords are on the same `device`; in principle, this is unnecessary. - """ - - def __init__( - self, - values: ht.DNDarray, - dims: Union[list, None] = None, - coords: Union[dict, None] = None, - name: Union[str, None] = None, - attrs: dict = {}, - ): - """ - Constructor for DXarray class - """ - # Check compatibility of the input arguments - dxarray_sanitation.check_compatibility_values_dims_coords(values, dims, coords) - dxarray_sanitation.check_name(name) - dxarray_sanitation.check_attrs(attrs) - - # after the checks, set the directly given attributes... - - self.__values = values - self.__name = name - self.__attrs = attrs - self.__coords = coords - self.__device = values.device - self.__comm = values.comm - - # if no names are provided, introduce generic names "dim_N", N = 0,1,... - if dims is None: - dims = ["dim_%d" % k for k in range(self.__values.ndim)] - - self.__dims = dims - - # ... and determine those not directly given: - # since we are in the DXarray class, split dimension is given by a string - self.__split = dim_idx_to_name(dims, values.split) - - # determine dimensions with and without coordinates - if coords is not None: - dims_with_coords = sum( - [list(it[0]) if isinstance(it[0], tuple) else [it[0]] for it in coords.items()], [] - ) - else: - dims_with_coords = [] - dims_without_coords = [dim for dim in dims if dim not in dims_with_coords] - - self.__dims_with_coords = dims_with_coords - self.__dims_without_coords = dims_without_coords - - # check if all appearing DNDarrays are balanced: as a result, the DXarray is balanced if and only if all DNDarrays are balanced - self.__balanced = dxarray_sanitation.check_if_balanced( - self.__values, self.__coords, force_check=False - ) - - """ - Attribute getters and setters for the DXarray class - """ - - @property - def values(self) -> ht.DNDarray: - """ - Get values from DXarray - """ - return self.__values - - @property - def dims(self) -> list: - """ - Get dims from DXarray - """ - return self.__dims - - @property - def coords(self) -> dict: - """ - Get coords from DXarray - """ - return self.__coords - - @property - def split(self) -> Union[str, None]: - """ - Get split dimension from DXarray - """ - return self.__split - - @property - def device(self) -> ht.Device: - """ - Get device from DXarray - """ - return self.__device - - @property - def comm(self) -> ht.Communication: - """ - Get communicator from DXarray - """ - return self.__comm - - @property - def name(self) -> str: - """ - Get name from DXarray - """ - return self.__name - - @property - def attrs(self) -> dict: - """ - Get attributes from DXarray - """ - return self.__attrs - - @property - def dims_with_coords(self) -> list: - """ - Get list of dims with coordinates from DXarray - """ - return self.__dims_with_coords - - @property - def dims_without_coords(self) -> list: - """ - Get list of dims without coordinates from DXarray - """ - return self.__dims_without_coords - - @property - def balanced(self) -> bool: - """ - Get the attributed `balanced` of DXarray. - Does not check whether the current value of this attribute is consistent! - (This can be ensured by calling :meth:`DXarray.is_balanced(force_check=True)` first.) - """ - return self.__balanced - - @values.setter - def values(self, newvalues: ht.DNDarray): - """ - Set value array of DXarray - """ - dxarray_sanitation.check_compatibility_values_dims_coords( - newvalues, self.__dims, self.__coords - ) - self.__values = newvalues - - @coords.setter - def coors(self, newcoords: Union[dict, None]): - """ - Set coordinates of DXarray - """ - dxarray_sanitation.check_compatibility_values_dims_coords( - self.__values, self.__dims, newcoords - ) - self.__coords = newcoords - - @name.setter - def name(self, newname: Union[str, None]): - """ - Set name of DXarray - """ - dxarray_sanitation.check_name(newname) - self.__name = newname - - @attrs.setter - def attrs(self, newattrs: Union[dict, None]): - """ - Set attributes of DXarray - """ - dxarray_sanitation.check_attrs(newattrs) - self.__attrs = newattrs - - """ - Private methods of DXarray class - """ - - def __dim_name_to_idx( - self, names: Union[str, tuple, list, None] - ) -> Union[str, tuple, list, None]: - """ - Converts a string (or tuple of strings) referring to dimensions of the DXarray to the corresponding numeric index (tuple of indices) of these dimensions. - Inverse of :meth:`__dim_idx_to_name`. - """ - return dim_name_to_idx(self.__dims, names) - - def __dim_idx_to_name( - self, idxs: Union[int, tuple, list, None] - ) -> Union[int, tuple, list, None]: - """ - Converts an numeric index (or tuple of such indices) referring to the dimensions of the DXarray to the corresponding name string (or tuple of name strings). - Inverse of :meth:`__dim_name_to_idx`. - """ - return dim_idx_to_name(self.__dims, idxs) - - def __repr__(self) -> str: - """ - Representation of DXarray as string. Required for printing. - """ - if self.__name is not None: - print_name = self.__name - else: - print_name = "" - print_values = self.__values.__repr__() - print_dimensions = ", ".join(self.__dims) - if self.__split is not None: - print_split = self.__split - else: - print_split = "None (no splitted)" - if self.__coords is not None: - print_coords = "\n".join( - [it[0].__repr__() + ": \t" + it[1].__repr__() for it in self.__coords.items()] - ) - print_coords = 'Coordinates of "' + print_name + '": ' + print_coords - else: - print_coords = "" - print_attributes = "\n".join( - ["\t" + it[0].__repr__() + ": \t" + it[1].__repr__() for it in self.__attrs.items()] - ) - if len(self.__dims_without_coords) != 0: - print_coordinates_without_dims = "".join( - [ - 'The remaining coordinates of "', - print_name, - '", ', - ", ".join(self.__dims_without_coords), - ", do not have coordinates. \n", - ] - ) - else: - print_coordinates_without_dims = "" - if self.__comm.rank == 0: - return "".join( - [ - 'DXarray with name "', - print_name, - '"\n', - 'Dimensions of "', - print_name, - '": ', - print_dimensions, - "\n", - 'Split dimension of "', - print_name, - '": ', - print_split, - "\n", - 'Values of "', - print_name, - '": ', - print_values, - "\n", - print_coords, - "\n", - print_coordinates_without_dims, - 'Attributes of "', - print_name, - '":', - print_attributes, - "\n\n", - ] - ) - else: - return "" - - """ - Public Methods of DXarray - """ - - def is_balanced(self, force_check: bool = False) -> bool: - """ - Checks if DXarray is balanced. If `force_check = False` (default), the current value of the - attribute `balanced` is returned unless this current value is None (i.e. no information on - no information available); only in the latter case, or if `force_check = True`, the value - of the attribute `balanced` is updated before being returned. - - """ - if self.__balanced is None or force_check: - self.__balanced = dxarray_sanitation.check_if_balanced( - self.__values, self.__coords, force_check=True - ) - return self.__balanced - - def resplit_(self, dim: Union[str, None] = None): - """ - In-place option for resplitting a :class:`DXarray`. - """ - if dim is not None and dim not in self.__dims: - raise ValueError( - "Input `dim` in resplit_ must be either None or a dimension of the underlying DXarray." - ) - # early out if nothing is to do - if self.__split == dim: - return self - else: - # resplit the value array accordingly - self.__values.resplit_(self.__dim_name_to_idx(dim)) - if self.__coords is not None: - for item in self.__coords.items(): - if isinstance(item[0], str) and item[0] == dim: - item[1].resplit_(0) - elif isinstance(item[0], tuple) and dim in item[0]: - item[1].resplit_(dim) - self.__split = dim - return self - - def balance_(self): - """ - In-place option for balancing a :class:`DXarray`. - """ - if self.is_balanced(force_check=True): - return self - else: - self.__values.balance_() - if self.__coords is not None: - for item in self.__coords.items(): - item[1].balance_() - self.__balanced = True - return self - - def xarray(self): - """ - Convert given DXarray (possibly distributed over some processes) to a non-distributed xarray (:class:`xarray.DataArray`) on all processes. - """ - non_dist_copy = self.resplit_(None) - if non_dist_copy.coords is None: - xarray_coords = None - else: - xarray_coords = { - item[0]: item[1].cpu().numpy() - if isinstance(item[1], ht.DNDarray) - else item[1].xarray() - for item in non_dist_copy.coords.items() - } - xarray = DataArray( - non_dist_copy.values.cpu().numpy(), - dims=non_dist_copy.dims, - coords=xarray_coords, - name=non_dist_copy.name, - attrs=non_dist_copy.attrs, - ) - del non_dist_copy - return xarray - - -def from_xarray( - xarray: xr.DataArray, - split: Union[str, None] = None, - device: ht.Device = None, - comm: ht.Communication = None, -) -> DXarray: - """ - Generates a DXarray from a given xarray (:class:`xarray.DataArray`) - """ - coords_dict = { - item[0]: ht.from_numpy(item[1].values, device=device, comm=comm) - if len(item[0]) == 1 - else DXarray( - ht.from_numpy(item[1].values, device=device, comm=comm), - dims=list(item[0]), - coords=None, - name=item[1].name.__str__(), - attrs=item[1].attrs, - ) - for item in xarray.coords.items() - } - dxarray = DXarray( - ht.from_numpy(xarray.values, device=device, comm=comm), - dims=list(xarray.dims), - coords=coords_dict, - name=xarray.name, - attrs=xarray.attrs, - ) - if split is not None: - if split not in dxarray.dims: - raise ValueError('split dimension "', split, '" is not a dimension of input array.') - else: - dxarray.resplit_(split) - return dxarray - - -from . import dxarray_sanitation -from . import dxarray_manipulations diff --git a/heat/dxarray/dxarray_manipulations.py b/heat/dxarray/dxarray_manipulations.py deleted file mode 100644 index d105862cd7..0000000000 --- a/heat/dxarray/dxarray_manipulations.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -Manipulation routines for the DXarray class -""" - -import torch -import heat as ht - -from .dxarray import DXarray diff --git a/heat/dxarray/dxarray_operations.py b/heat/dxarray/dxarray_operations.py deleted file mode 100644 index 0a4e89b15c..0000000000 --- a/heat/dxarray/dxarray_operations.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -Operations on Dxarray objects -""" - -import torch -import heat as ht - -from .dxarray import DXarray diff --git a/heat/dxarray/dxarray_sanitation.py b/heat/dxarray/dxarray_sanitation.py deleted file mode 100644 index cc66461226..0000000000 --- a/heat/dxarray/dxarray_sanitation.py +++ /dev/null @@ -1,198 +0,0 @@ -""" -Validation/Sanitation routines for the DXarray class -""" - -import torch -import heat as ht -from typing import Any, Union - -from .dxarray import DXarray, dim_name_to_idx, dim_idx_to_name - - -def check_compatibility_values_dims_coords( - values: ht.DNDarray, dims: Union[list, None], coords: Union[dict, None] -): - """ - Checks whether input values, dims, and coords are valid and compatible inputs for a DXarray - """ - if not isinstance(values, ht.DNDarray): - raise TypeError("Input `values` must be a DNDarray, but is ", type(values), ".") - if not (isinstance(dims, list) or dims is None): - raise TypeError("Input `dims` must be a list or None, but is ", type(dims), ".") - if not (isinstance(coords, dict) or coords is None): - raise TypeError("Input `coords` must be a dictionary or None, but is ", type(coords), ".") - - # check if entries of dims are unique - if dims is not None: - if len(set(dims)) != len(dims): - raise ValueError("Entries of `dims` must be unique.") - - # check if names of dims are given (and whether their number fits the number of dims of the values array) - if dims is not None: - if len(dims) != values.ndim: - raise ValueError( - "Number of dimension names in `dims` (=%d) must be equal to number of dimensions of `values` array (=%d)." - % (len(dims), values.ndim) - ) - - # check consistency of the coordinates provided - if coords is not None: - # go through all entries in the dictionary coords - for coord_item in coords.items(): - coord_item_dims = coord_item[0] - coord_item_coords = coord_item[1] - # first case: "classical" coordinates for a single dimension, sometimes referred to "logical coordinates" - if isinstance(coord_item_dims, str): - # here, the coordinates must be given by a one-dimensional DNDarray... - if not isinstance(coord_item_coords, ht.DNDarray): - raise TypeError( - "Coordinate arrays (i.e. entries of `coords`) for single dimension must be DNDarray. Here, type ", - type(coord_item_coords), - " is given for dimension ", - coord_item_dims, - ".", - ) - if not coord_item_coords.ndim == 1: - raise ValueError( - "Coordinate arrays for a single dimension must have dimension 1, but coordinate array for dimension ", - coord_item_dims, - " has dimension %d." % coord_item_coords.ndim, - ) - # ... with matching device and communicator, ... - if not coord_item_coords.device == values.device: - raise RuntimeError( - "Device of coordinate array for dimension ", - coord_item_dims, - "does not coincide with device for `values`.", - ) - if not coord_item_coords.comm == values.comm: - raise RuntimeError( - "Communicator of coordinate array for dimension ", - coord_item_dims, - "does not coincide with device for `values`.", - ) - # ... correct shape, and ... - if not ( - coord_item_coords.gshape[0] - == values.gshape[dim_name_to_idx(dims, coord_item_dims)] - ): - raise ValueError( - "Size of `values` in dimension ", - coord_item_dims, - " does not coincide with size of coordinate array in this dimension.", - ) - # ... that is split if and only if the coordinates refer to the split dimension of the DXarray - if coord_item_dims == dim_idx_to_name(dims, values.split): - if coord_item_coords.split != 0: - raise ValueError( - "`values` array is split along dimension ", - coord_item_dims, - ", but cooresponding coordinate array is not split along this dimension.", - ) - else: - if coord_item_coords.split is not None: - raise ValueError( - "`values` array is not split along dimension ", - coord_item_dims, - ", but cooresponding coordinate array is split along this dimension.", - ) - # second case: "physical coordinates" - two or more dimensions are "merged" together and equipped with a coordinate array - # that cannot be expressed as meshgrid of 1d coordinate arrays - elif isinstance(coord_item_dims, tuple): - # now, the coordinates must be given as a DXarray... - if not isinstance(coord_item_coords, DXarray): - raise TypeError( - "Coordinate arrays (i.e. entries of `coords`) must be DXarrays. Here, type ", - type(coord_item_coords), - " is given for dimensions ", - coord_item_dims, - ".", - ) - # ... with matching dimension names, ... - if coord_item_coords.dims != list(coord_item_dims): - raise ValueError( - "Dimension names of coordinate-DXarray and the corresponding dimension names in `coords` must be equal." - ) - # ... shape, ... - if not ( - torch.tensor(coord_item_coords.values.gshape) - == torch.tensor(values.gshape)[dim_name_to_idx(dims, list(coord_item_dims))] - ).all(): - raise ValueError( - "Size of `values` in dimensions ", - coord_item_dims, - " does not coincide with size of coordinate array in these dimensions.", - ) - # ... device and communicator, ... - if not coord_item_coords.device == values.device: - raise RuntimeError( - "Device of coordinate array for dimensions ", - coord_item_dims, - "does not coincide with device for `values`.", - ) - if not coord_item_coords.comm == values.comm: - raise RuntimeError( - "Communicator of coordinate array for dimensions ", - coord_item_dims, - "does not coincide with device for `values`.", - ) - # ... and split dimension. - if dim_idx_to_name(dims, values.split) in coord_item_dims: - if not coord_item_coords.split == dim_idx_to_name(dims, values.split): - raise ValueError( - "`values` array is split along dimension ", - coord_item_dims, - ", but cooresponding coordinate array is not split along ", - coord_item_coords.split, - ".", - ) - else: - if coord_item_coords.split is not None: - raise ValueError( - "`values` array is not split along dimensions ", - coord_item_dims, - ", but cooresponding coordinate array is split.", - ) - - -def check_name(name: Any): - """ - Checks whether input is appropriate for attribute `name` of `DXarray` - """ - if not (isinstance(name, str) or name is None): - raise TypeError("`name` must be a string or None, but is ", type(name), ".") - - -def check_attrs(attrs: Any): - """ - Checks whether input is appropriate for attributed `attrs` of `DXarray`. - """ - if not (isinstance(attrs, dict) or attrs is None): - raise TypeError("`attrs` must be a dictionary or None, but is ", type(attrs), ".") - - -def check_if_balanced(values: ht.DNDarray, coords: Union[dict, None], force_check: bool = False): - """ - Checks if a DXarray with values and coords is balanced, i.e., equally distributed on each process - A DXarray is balanced if and only if all underlying DNDarrays are balanced. - force_check allows to force a check on balancedness of the underlying DNDarrays. - """ - if not force_check: - if values.balanced is None or values.balanced is False or coords is None: - return values.balanced - else: - coords_balanced = [coord_item[1].balanced for coord_item in coords.items()] - if None in coords_balanced: - return None - else: - balanced = values.balanced and all(coords_balanced) - return balanced - else: - values_balanced = values.is_balanced(force_check=True) - if values_balanced is False or coords is None: - return values_balanced - else: - coords_balanced = [ - coord_item[1].is_balanced(force_check=True) for coord_item in coords.items() - ] - return values_balanced and all(coords_balanced) diff --git a/heat/dxarray/tests/__init__.py b/heat/dxarray/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/heat/dxarray/tests/test_dxarray.py b/heat/dxarray/tests/test_dxarray.py deleted file mode 100644 index 22517ac506..0000000000 --- a/heat/dxarray/tests/test_dxarray.py +++ /dev/null @@ -1,340 +0,0 @@ -import torch -import os -import unittest -import heat as ht -import numpy as np -import xarray as xr -from mpi4py import MPI - -from heat.core.tests.test_suites.basic_test import TestCase - -nprocs = MPI.COMM_WORLD.Get_size() - - -class TestHelpers(TestCase): - def test_dim_name_idx_conversion(self): - dims = ["x", "y", "z-axis", "time", None] - for names in ["z-axis", ("time", "x"), ["x", "y"]]: - idxs = ht.dxarray.dim_name_to_idx(dims, names) - # check for correct types (str, tuple or list) - self.assertTrue( - type(idxs) == type(names) or (isinstance(names, str) and isinstance(idxs, int)) - ) - # check if dim_name_to_idx and dim_idx_to_name are inverse to each other - names_back = ht.dxarray.dim_idx_to_name(dims, idxs) - self.assertEqual(names_back, names) - # check if TypeError is raised for wrong input types - names = 3.14 - with self.assertRaises(TypeError): - ht.dxarray.dim_name_to_idx(dims, names) - with self.assertRaises(TypeError): - ht.dxarray.dim_idx_to_name(dims, names) - - -class TestDXarray(TestCase): - def test_constructor_and_attributes(self): - m = 2 - n = 3 * nprocs - k = 10 - ell = 2 - - # test constructor in a case that should work and also test if all attributes of the DXarray are set correctly - # here we include a dimension ("no_measurements") without coordinates and two dimensions ("x", "y") with physical instead of logical coordinates - xy = ht.random.rand(m, n, split=1) - t = ht.linspace(-1, 1, k, split=None) - attrs_xy = {"units_xy": "meters"} - xy_coords = ht.dxarray.DXarray( - xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" - ) - data = ht.random.randn(m, n, k, ell, split=1) - name = "mytestarray" - attrs = { - "units time": "seconds", - "measured data": "something really random and meaningless", - } - dims = ["x", "y", "time", "no_measurements"] - coords = {("x", "y"): xy_coords, "time": t} - - dxarray = ht.dxarray.DXarray(data, dims=dims, coords=coords, name=name, attrs=attrs) - - self.assertEqual(dxarray.name, name) - self.assertEqual(dxarray.attrs, attrs) - self.assertEqual(dxarray.dims, dims) - self.assertEqual(dxarray.coords, coords) - self.assertTrue(ht.allclose(dxarray.values, data)) - self.assertEqual(dxarray.device, data.device) - self.assertEqual(dxarray.comm, data.comm) - self.assertEqual(dxarray.dims_with_coords, ["x", "y", "time"]) - self.assertEqual(dxarray.dims_without_coords, ["no_measurements"]) - self.assertEqual(dxarray.split, "y") - self.assertEqual(dxarray.balanced, True) - - # test print - print(dxarray) - - # special case that dim names have to bet set automatically and that there are no coords at all - dxarray = ht.dxarray.DXarray(data) - dims = ["dim_0", "dim_1", "dim_2", "dim_3"] - self.assertEqual(dxarray.dims, dims) - self.assertEqual(dxarray.dims_with_coords, []) - self.assertEqual(dxarray.dims_without_coords, dims) - self.assertEqual(dxarray.split, "dim_1") - self.assertEqual(dxarray.balanced, True) - - # test print - print(dxarray) - - def test_sanity_checks(self): - m = 2 - n = 3 * nprocs - k = 5 * nprocs - ell = 2 - - # here comes the "correct" data - xy = ht.random.rand(m, n, split=1) - t = ht.linspace(-1, 1, k, split=None) - attrs_xy = {"units_xy": "meters"} - xy_coords = ht.dxarray.DXarray( - xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" - ) - data = ht.random.randn(m, n, k, ell, split=1) - name = "mytestarray" - attrs = { - "units time": "seconds", - "measured data": "something really random and meaningless", - } - dims = ["x", "y", "time", "no_measurements"] - coords = {("x", "y"): xy_coords, "time": t} - - # wrong data type for name - with self.assertRaises(TypeError): - dxarray = ht.dxarray.DXarray(data, dims=dims, coords=coords, name=3.14, attrs=attrs) - - # wrong data type for attrs - with self.assertRaises(TypeError): - dxarray = ht.dxarray.DXarray(data, dims=dims, coords=coords, name=name, attrs=3.14) - - # wrong data type for value - with self.assertRaises(TypeError): - dxarray = ht.dxarray.DXarray(3.14, dims=dims, coords=coords, name=name, attrs=attrs) - - # wrong data type for dims - with self.assertRaises(TypeError): - dxarray = ht.dxarray.DXarray(data, dims=3.14, coords=coords, name=name, attrs=attrs) - - # wrong data type for coords - with self.assertRaises(TypeError): - dxarray = ht.dxarray.DXarray(data, dims=dims, coords=3.14, name=name, attrs=attrs) - - # length of dims and number of dimensions of value array do not match - with self.assertRaises(ValueError): - dxarray = ht.dxarray.DXarray( - data, dims=["x", "y", "time"], coords=coords, name=name, attrs=attrs - ) - - # entries of dims are not unique - with self.assertRaises(ValueError): - dxarray = ht.dxarray.DXarray( - data, dims=["x", "y", "x", "no_measurements"], coords=coords, name=name, attrs=attrs - ) - - # coordinate array for single dimension is not a DNDarray - wrong_coords = {("x", "y"): xy_coords, "time": 3.14} - with self.assertRaises(TypeError): - dxarray = ht.dxarray.DXarray( - data, dims=dims, coords=wrong_coords, name=name, attrs=attrs - ) - - # coordinate array for single dimension has wrong dimensionality - wrong_coords = {("x", "y"): xy_coords, "time": ht.ones((k, 2))} - with self.assertRaises(ValueError): - dxarray = ht.dxarray.DXarray( - data, dims=dims, coords=wrong_coords, name=name, attrs=attrs - ) - - # device of a coordinate array does not coincide with device of value array - # TBD - how to test this? - - # communicator of a coordinate array does not coincide with communicator of value array - # TBD - how to test this? - - # size of value array in a dimension does not coincide with size of coordinate array in this dimension - wrong_coords = {("x", "y"): xy_coords, "time": ht.ones(nprocs * k + 1)} - with self.assertRaises(ValueError): - dxarray = ht.dxarray.DXarray( - data, dims=dims, coords=wrong_coords, name=name, attrs=attrs - ) - - # value array is split along a dimension, but cooresponding coordinate array is not split along this dimension - wrong_data = ht.resplit(data, 2) - with self.assertRaises(ValueError): - dxarray = ht.dxarray.DXarray( - wrong_data, dims=dims, coords=coords, name=name, attrs=attrs - ) - - # value array is not split along a dimension, but cooresponding coordinate array is split along this dimension - wrong_coords = {("x", "y"): xy_coords, "time": ht.resplit(t, 0)} - with self.assertRaises(ValueError): - dxarray = ht.dxarray.DXarray( - data, dims=dims, coords=wrong_coords, name=name, attrs=attrs - ) - - # coordinate array in the case of "physical coordinates" is not a DXarray - wrong_coords = {("x", "y"): 3.14, "time": t} - with self.assertRaises(TypeError): - dxarray = ht.dxarray.DXarray( - data, dims=dims, coords=wrong_coords, name=name, attrs=attrs - ) - - # dimension names in coordinate DXarray in the case of "physical coordinates" do not coincide with dimension names of value array - wrong_coords_xy = ht.dxarray.DXarray( - xy, dims=["xx", "yy"], attrs=attrs_xy, name="coordinates of space" - ) - wrong_coords = {("x", "y"): wrong_coords_xy, "time": t} - with self.assertRaises(ValueError): - dxarray = ht.dxarray.DXarray( - data, dims=dims, coords=wrong_coords, name=name, attrs=attrs - ) - - # size of values for physical coordinates does not coincide with size of the respective coordinate array - wrong_xy = ht.random.rand(m + 1, n, split=1) - wrong_coords_xy = ht.dxarray.DXarray( - wrong_xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" - ) - wrong_coords = {("x", "y"): wrong_coords_xy, "time": t} - with self.assertRaises(ValueError): - dxarray = ht.dxarray.DXarray( - data, dims=dims, coords=wrong_coords, name=name, attrs=attrs - ) - - # communicator of coordinate array for physical coordinates does not coincide with communicator of value array - # TBD - how to test this? - - # device of coordinate array for physical coordinates does not coincide with device of value array - # TBD - how to test this? - - # coordinate array for physical coordinates is not split along the split dimension of the value array (two cases) - wrong_data = ht.random.randn(m, n, k, ell) - with self.assertRaises(ValueError): - dxarray = ht.dxarray.DXarray( - wrong_data, dims=dims, coords=coords, name=name, attrs=attrs - ) - wrong_xy = ht.random.rand(m, n) - wrong_coords_xy = ht.dxarray.DXarray( - wrong_xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" - ) - wrong_coords = {("x", "y"): wrong_coords_xy, "time": t} - with self.assertRaises(ValueError): - dxarray = ht.dxarray.DXarray( - data, dims=dims, coords=wrong_coords, name=name, attrs=attrs - ) - dxarray *= 1 - - def test_balanced_and_balancing(self): - m = 2 - n = 5 * nprocs - k = 2 - ell = 2 - - # create a highly unbalanced array for the values but not for the coordinates - xy = ht.random.rand(m, n, split=1) - xy = xy[:, 4:] - xy.balance_() - t = ht.linspace(-1, 1, k, split=None) - attrs_xy = {"units_xy": "meters"} - xy_coords = ht.dxarray.DXarray( - xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" - ) - data = ht.random.randn(m, n, k, ell, split=1) - data = data[:, 4:, :, :] - name = "mytestarray" - attrs = { - "units time": "seconds", - "measured data": "something really random and meaningless", - } - dims = ["x", "y", "time", "no_measurements"] - coords = {("x", "y"): xy_coords, "time": t} - - dxarray = ht.dxarray.DXarray(data, dims=dims, coords=coords, name=name, attrs=attrs) - - # balancedness-status is first unknown, then known as false (if explicitly checked) and finally known as false after this check - self.assertEqual(dxarray.balanced, None) - self.assertEqual(dxarray.is_balanced(), False) - self.assertEqual(dxarray.balanced, False) - - # rebalancing should work - dxarray.balance_() - self.assertEqual(dxarray.balanced, True) - self.assertEqual(dxarray.is_balanced(force_check=True), True) - - # rebalanced array should be equal to original one - self.assertTrue(ht.allclose(dxarray.values, data)) - self.assertEqual(dxarray.dims, dims) - self.assertEqual(dxarray.dims_with_coords, ["x", "y", "time"]) - self.assertEqual(dxarray.dims_without_coords, ["no_measurements"]) - self.assertEqual(dxarray.name, name) - self.assertEqual(dxarray.attrs, attrs) - # TBD: check for equality of coordinate arrays - - def test_resplit_(self): - m = 2 * nprocs - n = 3 * nprocs - k = 5 * nprocs - ell = 2 - - xy = ht.random.rand(m, n, split=1) - t = ht.linspace(-1, 1, k, split=None) - attrs_xy = {"units_xy": "meters"} - xy_coords = ht.dxarray.DXarray( - xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" - ) - data = ht.random.randn(m, n, k, ell, split=1) - name = "mytestarray" - attrs = { - "units time": "seconds", - "measured data": "something really random and meaningless", - } - dims = ["x", "y", "time", "no_measurements"] - coords = {("x", "y"): xy_coords, "time": t} - - dxarray = ht.dxarray.DXarray(data, dims=dims, coords=coords, name=name, attrs=attrs) - for newsplit in ["x", "time", None, "y"]: - dxarray.resplit_(newsplit) - self.assertEqual(dxarray.split, newsplit) - self.assertTrue(ht.allclose(dxarray.values, data)) - self.assertEqual(dxarray.dims, dims) - self.assertEqual(dxarray.dims_with_coords, ["x", "y", "time"]) - self.assertEqual(dxarray.dims_without_coords, ["no_measurements"]) - self.assertEqual(dxarray.name, name) - self.assertEqual(dxarray.attrs, attrs) - # TBD: check for equality of coordinate arrays - - def test_to_and_from_xarray(self): - m = 2 - n = 3 * nprocs - k = 10 - ell = 2 - - # test constructor in a case that should work and also test if all attributes of the DXarray are set correctly - # here we include a dimension ("no_measurements") without coordinates and two dimensions ("x", "y") with physical instead of logical coordinates - xy = ht.random.rand(m, n, split=1) - t = ht.linspace(-1, 1, k, split=None) - attrs_xy = {"units_xy": "meters"} - xy_coords = ht.dxarray.DXarray( - xy, dims=["x", "y"], attrs=attrs_xy, name="coordinates of space" - ) - data = ht.random.randn(m, n, k, ell, split=1) - name = "mytestarray" - attrs = { - "units time": "seconds", - "measured data": "something really random and meaningless", - } - dims = ["x", "y", "time", "no_measurements"] - coords = {("x", "y"): xy_coords, "time": t} - - dxarray = ht.dxarray.DXarray(data, dims=dims, coords=coords, name=name, attrs=attrs) - - xarray = dxarray.xarray() - print(xarray) - # TBD convert back and check for equality (or the other way round?) - # dxarray_from_xarray = ht.dxarray.from_xarray(xarray,split=dxarray.split,device=dxarray.device) diff --git a/scripts/numpy_coverage_tables.py b/scripts/numpy_coverage_tables.py index 4124503eec..606d505dfd 100644 --- a/scripts/numpy_coverage_tables.py +++ b/scripts/numpy_coverage_tables.py @@ -524,7 +524,7 @@ # Check if functions exist in the heat library and create table rows for func_name in function_list: - if hasattr(heat, func_name): + if hasattr(heat, func_name) or hasattr(heat.linalg, func_name): support_status = "✅" # Green checkmark for supported functions else: support_status = "❌" # Red cross for unsupported functions From 1bc612d8f2f5db4071f00dda3b74eb06b1cf84cc Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 28 Sep 2023 11:07:17 +0200 Subject: [PATCH 37/51] removed all stuff from dxarray that should not be here... --- .github/workflows/ci.yaml | 1 - heat/core/manipulations.py | 32 +------------------------------- 2 files changed, 1 insertion(+), 32 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index db4f47b98e..4da49b8e33 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -52,6 +52,5 @@ jobs: pip install pytest pip install ${{ matrix.pytorch-version }} --extra-index-url https://download.pytorch.org/whl/cpu pip install ${{ matrix.install-options }} - pip install xarray mpirun -n 3 pytest heat/ mpirun -n 4 pytest heat/ diff --git a/heat/core/manipulations.py b/heat/core/manipulations.py index c35d16faf5..0d986a8f34 100644 --- a/heat/core/manipulations.py +++ b/heat/core/manipulations.py @@ -9,7 +9,7 @@ from typing import Iterable, Type, List, Callable, Union, Tuple, Sequence, Optional -from .communication import MPI, sanitize_comm, Communication +from .communication import MPI from .dndarray import DNDarray from . import arithmetics @@ -21,7 +21,6 @@ from . import tiling from . import types from . import _operations -from . import devices __all__ = [ "balance", @@ -38,7 +37,6 @@ "flip", "fliplr", "flipud", - "from_numpy", "hsplit", "hstack", "moveaxis", @@ -1143,34 +1141,6 @@ def flipud(a: DNDarray) -> DNDarray: return flip(a, 0) -def from_numpy( - x: np.ndarray, - split: Optional[int] = None, - device: Optional[Union[str, devices.Device]] = None, - comm: Optional[Communication] = None, -) -> DNDarray: - """ - Creates DNDarray from given NumPy Array. The data type is determined by the data type of the Numpy Array. - Split-dimension, device and communicator can be prescribed as usual. - Inverse of :meth:`DNDarray.numpy()`. - """ - dtype = types.canonical_heat_type(x.dtype) - device = devices.sanitize_device(device) - comm = sanitize_comm(comm) - xht = DNDarray( - torch.from_numpy(x).to(device.torch_device), - x.shape, - dtype=dtype, - split=None, - device=device, - comm=comm, - balanced=True, - ) - if split is not None: - xht.resplit_(split) - return xht - - def hsplit(x: DNDarray, indices_or_sections: Iterable) -> List[DNDarray, ...]: """ Split array into multiple sub-DNDarrays along the 2nd axis (horizontally/column-wise). From 9046bccb832824d08be6684289be4fae5aff8056 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 28 Sep 2023 11:09:36 +0200 Subject: [PATCH 38/51] ... --- .github/workflows/benchmark_main.yml | 46 ---------------------------- .github/workflows/benchmark_pr.yml | 46 ---------------------------- 2 files changed, 92 deletions(-) delete mode 100644 .github/workflows/benchmark_main.yml delete mode 100644 .github/workflows/benchmark_pr.yml diff --git a/.github/workflows/benchmark_main.yml b/.github/workflows/benchmark_main.yml deleted file mode 100644 index c226d36086..0000000000 --- a/.github/workflows/benchmark_main.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: Benchmark main and save -on: - push: - branches: - - main - -jobs: - benchmark-main: - name: Benchmark main and save - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Setup MPI - uses: mpi4py/setup-mpi@v1 - - name: Use Python 3.10 - uses: actions/setup-python@v4 - with: - python-version: 3.10.11 # Perun only supports 3.8 and ahead - architecture: x64 - - name: Test - run: | - pip install torch==1.12.1+cpu torchvision==0.13.1+cpu torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html - pip install xarray - pip install .[cb] - PERUN_RUN_ID=N4 mpirun -n 4 python benchmarks/cb/main.py - jq -s flatten bench_data/*.json > bench_data/all_benchmarks.json - - name: Save benchmark result and update gh-pages-chart - if: ${{github.ref == 'refs/heads/main'}} - uses: benchmark-action/github-action-benchmark@v1 - with: - github-token: ${{secrets.GITHUB_TOKEN}} - # Benchmark action input and output - tool: 'customSmallerIsBetter' - output-file-path: bench_data/all_benchmarks.json - # external-data-json-path: ./cache/benchmark-data.json - # Alert configuration - fail-on-alert: false # Don't fail on main branch - comment-on-alert: true - # Save benchmarks from the main branch - save-data-file: true - # Pages configuration - auto-push: true - gh-pages-branch: gh-pages - benchmark-data-dir-path: dev/bench - # Upload the updated cache file for the next job by actions/cache diff --git a/.github/workflows/benchmark_pr.yml b/.github/workflows/benchmark_pr.yml deleted file mode 100644 index db2271982f..0000000000 --- a/.github/workflows/benchmark_pr.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: Benchmark PR -on: - pull_request: - types: [opened, synchronize, reopened, labeled] - branches: [main] - -jobs: - benchmark-pr: - name: Benchmark PR - if: contains(github.event.pull_request.labels.*.name, 'benchmark PR') - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Setup MPI - uses: mpi4py/setup-mpi@v1 - - name: Use Python 3.10 - uses: actions/setup-python@v4 - with: - python-version: 3.10.11 # Perun only supports 3.8 and ahead - architecture: x64 - - name: Test - run: | - pip install torch==1.12.1+cpu torchvision==0.13.1+cpu torchaudio==0.12.1 -f https://download.pytorch.org/whl/torch_stable.html - pip install xarray - pip install .[cb] - PERUN_RUN_ID=N4 mpirun -n 4 python benchmarks/cb/main.py - jq -s flatten bench_data/*.json > bench_data/all_benchmarks.json - - name: Compare benchmark result - if: ${{github.ref != 'refs/heads/main'}} - uses: benchmark-action/github-action-benchmark@v1 - with: - github-token: ${{secrets.GITHUB_TOKEN}} - # Benchmark action input and output - tool: 'customSmallerIsBetter' - output-file-path: bench_data/all_benchmarks.json - # external-data-json-path: ./cache/benchmark-data.json - # Alert configuration - fail-on-alert: true - comment-on-alert: true - # Ignore results from non main branches. - save-data-file: false - # Pages configuration - auto-push: false - gh-pages-branch: gh-pages - benchmark-data-dir-path: dev/bench From 76f56f8c58160faa4e41718e970faadbfffff9b7 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 28 Sep 2023 11:10:52 +0200 Subject: [PATCH 39/51] ... --- .github/workflows/ReceivePR.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ReceivePR.yml b/.github/workflows/ReceivePR.yml index 7749056b8a..89d058ad31 100644 --- a/.github/workflows/ReceivePR.yml +++ b/.github/workflows/ReceivePR.yml @@ -27,7 +27,6 @@ jobs: - name: Test run: | pip install .[dev] - pip install xarray pre-commit run --all-files python -m unittest From 7f18de6e4b01e10a482243bad510f47a4e1bc532 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Thu, 28 Sep 2023 12:03:38 +0200 Subject: [PATCH 40/51] update environment yaml files --- scripts/heat_dev.yml | 4 ++-- scripts/heat_env.yml | 13 ++----------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/scripts/heat_dev.yml b/scripts/heat_dev.yml index 3de812e489..1ca994771f 100644 --- a/scripts/heat_dev.yml +++ b/scripts/heat_dev.yml @@ -3,12 +3,12 @@ channels: - conda-forge - defaults dependencies: - - python=3.9 + - python=3.10 - openmpi - mpi4py - h5py[version='>=2.9',build=mpi*] - netcdf4 - - pytorch=1.13.0 + - pytorch - torchvision - scipy - pre-commit diff --git a/scripts/heat_env.yml b/scripts/heat_env.yml index 9d9130c22f..1d5e1b6dcd 100644 --- a/scripts/heat_env.yml +++ b/scripts/heat_env.yml @@ -3,14 +3,5 @@ channels: - conda-forge - defaults dependencies: - - python=3.9 - - openmpi - - mpi4py - - h5py[version='>=2.9',build=mpi*] - - netcdf4 - - pytorch=1.13.0 - - torchvision - - scipy - - pip - - pip: - - heat + - python=3.10 + - heat From 127fadddedad7529d76ac09d8bfb2f70190858d1 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Thu, 28 Sep 2023 12:30:16 +0200 Subject: [PATCH 41/51] add link to Rabenseifner's MPI course --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index af15f073c3..7f0ff9cb4d 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Heat is a distributed tensor framework for high performance data analytics. [![Benchmarks](https://img.shields.io/badge/Github--Pages-Benchmarks-2ea44f)](https://helmholtz-analytics.github.io/heat/dev/bench) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) -- Table of Contents +# Table of Contents - [What is Heat for?](#what-is-heat-for) - [Features](#features) - [Installation](#installation) @@ -172,8 +172,9 @@ If you’re unsure where to start or how your skills fit in, reach out! You can ### Parallel Computing and MPI: -* @davidhenty's [course](https://www.archer2.ac.uk/training/courses/200514-mpi/) +* David Henty's [course](https://www.archer2.ac.uk/training/courses/200514-mpi/) * Wes Kendall's [Tutorials](https://mpitutorial.com/tutorials/) +* Rolf Rabenseifner's [MPI course material](https://www.hlrs.de/training/self-study-materials/mpi-course-material) (including C, Fortran **and** Python via `mpi4py`) ### mpi4py From 40c7e0776f4c8ffb34d25e6b0a7f1f9b5a636f14 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 28 Sep 2023 12:36:54 +0200 Subject: [PATCH 42/51] added numpy.random to coverage tables furthermore, removed bug in the handling of submodules in the coverage table generator --- coverage_tables.md | 28 ++++++++++++++++++---- scripts/numpy_coverage_tables.py | 40 +++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/coverage_tables.md b/coverage_tables.md index 5246aaebf9..f90dadfba4 100644 --- a/coverage_tables.md +++ b/coverage_tables.md @@ -12,6 +12,7 @@ The following tables show the NumPy functions supported by Heat. 7. [NumPy Logic Functions](#numpy-logic-functions) 8. [NumPy Sorting Operations](#numpy-sorting-operations) 9. [NumPy Statistical Operations](#numpy-statistical-operations) +10. [NumPy Random Operations](#numpy-random-operations) ## NumPy Mathematical Functions [Back to Table of Contents](#table-of-contents) @@ -283,22 +284,22 @@ The following tables show the NumPy functions supported by Heat. | linalg.matrix_power | ❌ | | kron | ❌ | | linalg.cholesky | ❌ | -| linalg.qr | ❌ | +| linalg.qr | ✅ | | linalg.svd | ❌ | | linalg.eig | ❌ | | linalg.eigh | ❌ | | linalg.eigvals | ❌ | | linalg.eigvalsh | ❌ | -| linalg.norm | ❌ | +| linalg.norm | ✅ | | linalg.cond | ❌ | -| linalg.det | ❌ | +| linalg.det | ✅ | | linalg.matrix_rank | ❌ | | linalg.slogdet | ❌ | | trace | ✅ | | linalg.solve | ❌ | | linalg.tensorsolve | ❌ | | linalg.lstsq | ❌ | -| linalg.inv | ❌ | +| linalg.inv | ✅ | | linalg.pinv | ❌ | | linalg.tensorinv | ❌ | ## NumPy Logic Functions @@ -385,3 +386,22 @@ The following tables show the NumPy functions supported by Heat. | bincount | ✅ | | histogram_bin_edges | ❌ | | digitize | ✅ | +## NumPy Random Operations +[Back to Table of Contents](#table-of-contents) + +| NumPy Random Operations | Heat | +|---|---| +| random.rand | ✅ | +| random.randn | ✅ | +| random.randint | ✅ | +| random.random_integers | ❌ | +| random.random_sample | ✅ | +| random.ranf | ✅ | +| random.sample | ✅ | +| random.choice | ❌ | +| random.bytes | ❌ | +| random.shuffle | ❌ | +| random.permutation | ✅ | +| random.seed | ✅ | +| random.get_state | ✅ | +| random.set_state | ✅ | diff --git a/scripts/numpy_coverage_tables.py b/scripts/numpy_coverage_tables.py index 606d505dfd..1d4c8cff6a 100644 --- a/scripts/numpy_coverage_tables.py +++ b/scripts/numpy_coverage_tables.py @@ -500,6 +500,40 @@ numpy_functions.append(numpy_statistics_operations) headers[str(len(headers))] = "NumPy Statistical Operations" +# numpy random operations +numpy_random_operations = [ + # numpy.random.rand + # numpy.random.randn + # numpy.random.randint + # numpy.random.random_integers + # numpy.random.random_sample + # numpy.random.ranf + # numpy.random.sample + # numpy.random.choice + # numpy.random.bytes + # numpy.random.shuffle + # numpy.random.permutation + # numpy.random.seed + # numpy.random.get_state + # numpy.random.set_state + "random.rand", + "random.randn", + "random.randint", + "random.random_integers", + "random.random_sample", + "random.ranf", + "random.sample", + "random.choice", + "random.bytes", + "random.shuffle", + "random.permutation", + "random.seed", + "random.get_state", + "random.set_state", +] +numpy_functions.append(numpy_random_operations) +headers[str(len(headers))] = "NumPy Random Operations" + # initialize markdown file # open the file in write mode f = open("coverage_tables.md", "w") @@ -524,7 +558,11 @@ # Check if functions exist in the heat library and create table rows for func_name in function_list: - if hasattr(heat, func_name) or hasattr(heat.linalg, func_name): + if ( + hasattr(heat, func_name) + or hasattr(heat.linalg, func_name.replace("linalg.", "")) + or hasattr(heat.random, func_name.replace("random.", "")) + ): support_status = "✅" # Green checkmark for supported functions else: support_status = "❌" # Red cross for unsupported functions From 9b731e104703bab439a7009dbcf2a0a8c5568b19 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 28 Sep 2023 12:46:46 +0200 Subject: [PATCH 43/51] added link to hsvd blog post --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index af15f073c3..e171949899 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,8 @@ With Heat you can: - exploit the entire, cumulative RAM of your many nodes for memory-intensive operations and algorithms; - run your NumPy/SciPy code on GPUs (CUDA, ROCm, coming up: Apple MPS). +For an example that highlights the benefits of multi-node parallelism and hardware acceleration and the related code snippets see, e.g., our [blog post on trucated SVD of a 200GB data set](https://helmholtz-analytics.github.io/heat/2023/06/16/new-feature-hsvd.html). + Check out our [coverage tables](coverage_tables.md) to see which NumPy, SciPy, scikit-learn functions are already supported. If you need a functionality that is not yet supported: From b712b893467255348b8387fc40462a393775d925 Mon Sep 17 00:00:00 2001 From: Hoppe Date: Thu, 28 Sep 2023 12:49:11 +0200 Subject: [PATCH 44/51] just reformulation of a sentence --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e171949899..a1cc37033e 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ With Heat you can: - exploit the entire, cumulative RAM of your many nodes for memory-intensive operations and algorithms; - run your NumPy/SciPy code on GPUs (CUDA, ROCm, coming up: Apple MPS). -For an example that highlights the benefits of multi-node parallelism and hardware acceleration and the related code snippets see, e.g., our [blog post on trucated SVD of a 200GB data set](https://helmholtz-analytics.github.io/heat/2023/06/16/new-feature-hsvd.html). +For a example that highlights the benefits of multi-node parallelism, hardware acceleration, and how easy this can be done with the help of Heat, see, e.g., our [blog post on trucated SVD of a 200GB data set](https://helmholtz-analytics.github.io/heat/2023/06/16/new-feature-hsvd.html). Check out our [coverage tables](coverage_tables.md) to see which NumPy, SciPy, scikit-learn functions are already supported. From c0266717aab87dbad3820cfc2e7b7240c920e22a Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Thu, 28 Sep 2023 14:37:54 +0200 Subject: [PATCH 45/51] test logos layout new FZJ logo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7f0ff9cb4d..82f36e875c 100644 --- a/README.md +++ b/README.md @@ -229,5 +229,5 @@ under project number ZT-I-0003 and the Helmholtz AI platform grant.* ---
- +
From 86abd2e25c33290c5b86bfe6021f7aa3ab365818 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Thu, 28 Sep 2023 14:42:16 +0200 Subject: [PATCH 46/51] fix FZJ logo link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e69ced71d2..0949d188ad 100644 --- a/README.md +++ b/README.md @@ -231,5 +231,5 @@ under project number ZT-I-0003 and the Helmholtz AI platform grant.* ---
- +
From f136ad1aea55281809686e5832e9aee30c67fb17 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Thu, 28 Sep 2023 14:44:29 +0200 Subject: [PATCH 47/51] point FZJ logo back to main --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0949d188ad..32cf98b656 100644 --- a/README.md +++ b/README.md @@ -231,5 +231,5 @@ under project number ZT-I-0003 and the Helmholtz AI platform grant.* ---
- +
From 8cae150d5fffcbeab1fb8b9fb7ef1fb86bf44706 Mon Sep 17 00:00:00 2001 From: JuanPedroGHM Date: Thu, 28 Sep 2023 15:32:25 +0200 Subject: [PATCH 48/51] replaced version numbers from docker docs with placeholders (#1222) * replaced version numbers from docker docs with placeholders * info about nvidia base image --- docker/README.md | 30 ++++++++++++++++-------------- quick_start.md | 6 ++++-- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/docker/README.md b/docker/README.md index 9202c974c5..aedeee419d 100644 --- a/docker/README.md +++ b/docker/README.md @@ -3,12 +3,12 @@ There is some flexibility to building the Docker images of Heat. Firstly, one can build from the released version taken from PyPI. This will either be -the latest release or the version set through the `--build-arg=HEAT_VERSION=1.2.0` +the latest release or the version set through the `--build-arg=HEAT_VERSION=X.Y.Z` argument. Secondly one can build a docker image from the GitHub sources, selected through `--build-arg=INSTALL_TYPE=source`. The default branch to be built is main, other -branches can be specified using `--build-arg=HEAT_BRANCH=branchname`. +branches can be specified using `--build-arg=HEAT_BRANCH=`. ## General build @@ -18,13 +18,15 @@ The [Dockerfile](./Dockerfile) guiding the build of the Docker image is located directory. It is typically most convenient to `cd` over here and run the Docker build as: ```console -$ docker build --build-args HEAT_VERSION=1.2.2 --PYTORCH_IMG=22.05-py3 -t heat:local . +$ docker build --build-args HEAT_VERSION=X.Y.Z --PYTORCH_IMG= -t heat . ``` +The heat image is based on the nvidia pytorch container. You can find exisiting tags in the [nvidia container catalog](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags). + We also offer prebuilt images in our [Package registry](https://github.com/helmholtz-analytics/heat/pkgs/container/heat) from which you can pull existing images: ```console -$ docker pull ghcr.io/helmholtz-analytics/heat:1.2.0-dev_torch1.12_cuda11.7_py3.8 +$ docker pull ghcr.io/helmholtz-analytics/heat: ``` ### Building for HPC @@ -37,24 +39,24 @@ image also for HPC systems, such as the ones available at [Jülich Supercomputin To use one of the existing images from our registry: - $ apptainer build heat.sif docker://ghcr.io/helmholtz-analytics/heat:1.2.0-dev_torch1.12_cuda11.7_py3.8 + $ apptainer build heat.sif docker://ghcr.io/helmholtz-analytics/heat: Building the image can require root access in some systems. If that is the case, we recommend building the image on a local machine, and then upload it to the desired HPC system. If you see an error indicating that there is not enough space, use the --tmpdir flag of the build command. [Apptainer docs](https://apptainer.org/docs/user/latest/build_a_container.html) -#### SIB (Singularity Image Builder) +#### SIB (Singularity Image Builder) for Apptainer images A simple `Dockerfile` (in addition to the one above) to be used with SIB could look like this: - FROM ghcr.io/helmholtz-analytics/heat:1.2.0_torch1.12_cuda11.7_py3.8 + FROM ghcr.io/helmholtz-analytics/heat: The invocation to build the image would be: - $ sib upload ./Dockerfile heat_1.2.0_torch1.12_cuda11.7_py3.8 - $ sib build --recipe-name heat_1.2.0_torch1.12_cuda11.7_py3.8 - $ sib download --recipe-name heat_1.2.0_torch1.12_cuda11.7_py3.8 + $ sib upload ./Dockerfile heat + $ sib build --recipe-name heat + $ sib download --recipe-name heat However, SIB is capable of using just about any available Docker image from any registry, such that a specific Singularity image can be built by simply referencing the @@ -62,7 +64,7 @@ available image. SIB is thus used as a conversion tool. ## Running on HPC - $ singularity run --nv heat_1.2.0_torch.11_cuda11.5_py3.9.sif /bin/bash + $ apptainer run --nv heat /bin/bash $ python Python 3.8.13 (default, Mar 28 2022, 11:38:47) [GCC 7.5.0] :: Anaconda, Inc. on linux @@ -70,12 +72,12 @@ available image. SIB is thus used as a conversion tool. >>> import heat as ht ... -The `--nv` argument to `singularity`enables NVidia GPU support, which is desired for +The `--nv` argument to `apptainer` enables NVidia GPU support, which is desired for Heat. ### Multi-node example -The following file can be used as an example to use the singularity file together with SLURM, which allows heat to work in a multi-node environment. +The following file can be used as an example to use the apptainer file together with SLURM, which allows heat to work in a multi-node environment. ```bash #!/bin/bash @@ -85,5 +87,5 @@ The following file can be used as an example to use the singularity file togethe ... -srun --mpi="pmi2" singularity exec --nv heat_1.2.0_torch.11_cuda11.5_py3.9.sif bash -c "cd ~/code/heat/examples/lasso; python demo.py" +srun --mpi="pmi2" apptainer exec --nv heat_1.2.0_torch.11_cuda11.5_py3.9.sif bash -c "cd ~/code/heat/examples/lasso; python demo.py" ``` diff --git a/quick_start.md b/quick_start.md index 6b700cb298..be595da8fd 100644 --- a/quick_start.md +++ b/quick_start.md @@ -38,7 +38,7 @@ Work in progress. Get the docker image from our package repository ``` -docker pull ghcr.io/helmholtz-analytics/heat:1.2.0-dev_torch1.12_cuda11.7_py3.8 +docker pull ghcr.io/helmholtz-analytics/heat: ``` or build it from our Dockerfile @@ -46,9 +46,11 @@ or build it from our Dockerfile ``` git clone https://github.com/helmholtz-analytics/heat.git cd heat/docker -docker build -t heat:latest . +docker build --build-args HEAT_VERSION=X.Y.Z --PYTORCH_IMG= -t heat:latest . ``` +`` should be replaced with an existing version of the official Nvidia pytorch container image. Information and existing tags can be found on the [here](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch) + See [our docker README](https://github.com/helmholtz-analytics/heat/tree/main/docker/README.md) for other details. ### Test From 0d405d0cb3c5ba409ef7ae08be9569b65582f264 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Thu, 28 Sep 2023 15:52:19 +0200 Subject: [PATCH 49/51] edits --- README.md | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 32cf98b656..a593d45024 100644 --- a/README.md +++ b/README.md @@ -81,11 +81,12 @@ Check out our [features](#features) and the [Heat API Reference](https://heat.re - h5py - netCDF4 -### GPU-support -In order to do computations on your GPU(s), you will require a CUDA or ROCm installation matching your hardware and its drivers. Moreover, -- your PyTorch installation must be compiled with CUDA/ROCm support. +### GPU support +In order to do computations on your GPU(s): +- your CUDA or ROCm installation must match your hardware and its drivers; +- your [PyTorch installation](https://pytorch.org/get-started/locally/) must be compiled with CUDA/ROCm support. -### HPC-systems +### HPC systems On most HPC-systems you will not be able to install/compile MPI or CUDA/ROCm yourself. Instead, you will most likely need to load a pre-installed MPI and/or CUDA/ROCm module from the module system. Maybe, you will even find PyTorch, h5py, or mpi4py as (part of) such a module. Note that for optimal performance on GPU, you need to usa an MPI library that has been compiled with CUDA/ROCm support (e.g., so-called "CUDA-aware MPI"). @@ -133,12 +134,12 @@ Local torch tensor on rank 1 : tensor([5, 6, 7, 8, 9], dtype=torch.int32) # FAQ -In progress... +Work in progress... - - Users + + +Please do mention Heat in your publications if it helped your research. You can cite: * Götz, M., Debus, C., Coquelin, D., Krajsek, K., Comito, C., Knechtges, P., Hagemeier, B., Tarnawa, M., Hanselmann, S., Siggel, S., Basermann, A. & Streit, A. (2020). HeAT - a Distributed and GPU-accelerated Tensor Framework for Data Analytics. In 2020 IEEE International Conference on Big Data (Big Data) (pp. 276-287). IEEE, DOI: 10.1109/BigData50022.2020.9378050. From 657bc59f21a35207227f9df9d57caa193533b1f9 Mon Sep 17 00:00:00 2001 From: Claudia Comito <39374113+ClaudiaComito@users.noreply.github.com> Date: Fri, 29 Sep 2023 15:07:24 +0200 Subject: [PATCH 50/51] edits, multiple mention ReadTheDocs tutorials --- README.md | 102 ++++++++++++++++++++++++++---------------------------- 1 file changed, 50 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index a593d45024..f19e8dd99c 100644 --- a/README.md +++ b/README.md @@ -24,17 +24,17 @@ Heat is a distributed tensor framework for high performance data analytics. # Table of Contents - [What is Heat for?](#what-is-heat-for) - [Features](#features) + - [Getting Started](#getting-started) - [Installation](#installation) - [Requirements](#requirements) - [pip](#pip) - [conda](#conda) - - [Getting Started](#getting-started) - - [FAQ](#faq) - [Support Channels](#support-channels) - [Contribution guidelines](#contribution-guidelines) - - [Resources for MPI programming](#resources-for-mpi-programming) + - [Resources](#resources) - [License](#license) - [Citing Heat](#citing-heat) + - [FAQ](#faq) - [Acknowledgements](#acknowledgements) @@ -53,7 +53,7 @@ For a example that highlights the benefits of multi-node parallelism, hardware a Check out our [coverage tables](coverage_tables.md) to see which NumPy, SciPy, scikit-learn functions are already supported. If you need a functionality that is not yet supported: - - [search existing issues](https://github.com/helmholtz-analytics/heat/issues) and make sure to comment if someone else already requested it; + - [search existing issues](https://github.com/helmholtz-analytics/heat/issues) and make sure to leave a comment if someone else already requested it; - [open a new issue](https://github.com/helmholtz-analytics/heat/issues/new/choose). @@ -67,6 +67,42 @@ Check out our [features](#features) and the [Heat API Reference](https://heat.re * Seamless integration with the NumPy/SciPy ecosystem * Python array API (work in progress) + +# Getting Started + +Go to [Quick Start](quick_start.md) for a quick overview. For more details, see [Installation](#installation). + +**You can test your setup** by running the [`heat_test.py`](https://github.com/helmholtz-analytics/heat/blob/main/scripts/heat_test.py) script: + +```shell +mpirun -n 2 python heat_test.py +``` + +It should print something like this: + +```shell +x is distributed: True +Global DNDarray x: DNDarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=ht.int32, device=cpu:0, split=0) +Global DNDarray x: +Local torch tensor on rank 0 : tensor([0, 1, 2, 3, 4], dtype=torch.int32) +Local torch tensor on rank 1 : tensor([5, 6, 7, 8, 9], dtype=torch.int32) +``` + +Our Jupyter Notebook [**Tutorial**](https://github.com/helmholtz-analytics/heat/blob/main/scripts/) illustrates Heat's basics. More tutorials [here](https://heat.readthedocs.io/en/latest/tutorials.html). + +The complete documentation of the latest version is always deployed on +[Read the Docs](https://heat.readthedocs.io/). + + + # Installation ## Requirements @@ -106,59 +142,14 @@ The conda build includes all dependencies **including OpenMPI**. conda install -c conda-forge heat ``` -# Getting Started - -Go to [Quick Start](quick_start.md) for a quick overview. - -Check out our Jupyter Notebook [**Tutorial**](https://github.com/helmholtz-analytics/heat/blob/main/scripts/) -right here on GitHub or in the `./scripts` directory, to learn and understand Heat's basics. - -The complete documentation of the latest version is always deployed on -[Read the Docs](https://heat.readthedocs.io/). - -### You can test your setup by running the [`heat_test.py`](https://github.com/helmholtz-analytics/heat/blob/main/scripts/heat_test.py) script: - -```shell -mpirun -n 2 python heat_test.py -``` - -### It should print something like this: - -```shell -x is distributed: True -Global DNDarray x: DNDarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=ht.int32, device=cpu:0, split=0) -Global DNDarray x: -Local torch tensor on rank 0 : tensor([0, 1, 2, 3, 4], dtype=torch.int32) -Local torch tensor on rank 1 : tensor([5, 6, 7, 8, 9], dtype=torch.int32) -``` - - -# FAQ -Work in progress... - - - - - # Support Channels -Go ahead and ask questions on [GitHub Discussions](https://github.com/helmholtz-analytics/heat/discussions). If you found a bug or miss a feature, then please file a new [issue](https://github.com/helmholtz-analytics/heat/issues/new/choose). You can also get in touch with us on [Mattermost](https://mattermost.hzdr.de/signup_user_complete/?id=3sixwk9okpbzpjyfrhen5jpqfo) (sign up with your GitHub credentials). Once you log in, you can introduce yourself on the `Town Square` channel. +Go ahead and ask questions on [GitHub Discussions](https://github.com/helmholtz-analytics/heat/discussions). If you found a bug or are missing a feature, then please file a new [issue](https://github.com/helmholtz-analytics/heat/issues/new/choose). You can also get in touch with us on [Mattermost](https://mattermost.hzdr.de/signup_user_complete/?id=3sixwk9okpbzpjyfrhen5jpqfo) (sign up with your GitHub credentials). Once you log in, you can introduce yourself on the `Town Square` channel. # Contribution guidelines -**We welcome contributions from the community, if you want to contribute to Heat, be sure to review the [Contribution Guidelines](contributing.md) and [Resources for MPI programming](#resources-for-mpi-programming) before getting started!** +**We welcome contributions from the community, if you want to contribute to Heat, be sure to review the [Contribution Guidelines](contributing.md) and [Resources](#resources) before getting started!** We use [GitHub issues](https://github.com/helmholtz-analytics/heat/issues) for tracking requests and bugs, please see [Discussions](https://github.com/helmholtz-analytics/heat/discussions) for general questions and discussion. You can also get in touch with us on [Mattermost](https://mattermost.hzdr.de/signup_user_complete/?id=3sixwk9okpbzpjyfrhen5jpqfo) (sign up with your GitHub credentials). Once you log in, you can introduce yourself on the `Town Square` channel. @@ -167,7 +158,7 @@ If you’re unsure where to start or how your skills fit in, reach out! You can **If you are new to contributing to open source, [this guide](https://opensource.guide/how-to-contribute/) helps explain why, what, and how to get involved.** -## Resources for MPI programming +## Resources * [Heat Tutorials](https://heat.readthedocs.io/en/latest/tutorials.html) * [Heat API Reference](https://heat.readthedocs.io/en/latest/autoapi/index.html) @@ -220,6 +211,13 @@ Please do mention Heat in your publications if it helped your research. You can doi={10.1109/BigData50022.2020.9378050} } ``` +# FAQ +Work in progress... + + ## Acknowledgements From 724a80b256ef0cea9f71dbe12c124d6713bcbb32 Mon Sep 17 00:00:00 2001 From: JuanPedroGHM Date: Mon, 9 Oct 2023 11:12:06 +0200 Subject: [PATCH 51/51] Benchmarks: missing branch argument in gitlab pipeline trigger (#1227) * missing variable in ci call * filter status creation --- .github/workflows/bench_trigger.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/bench_trigger.yml b/.github/workflows/bench_trigger.yml index deb06e8acf..be12d13c7c 100644 --- a/.github/workflows/bench_trigger.yml +++ b/.github/workflows/bench_trigger.yml @@ -28,7 +28,6 @@ jobs: -F "variables[PR]=${{ github.event.pull_request.number }}" \ -F "variables[AUTHOR]=${{ github.event.pull_request.assignee.login }}" \ https://codebase.helmholtz.cloud/api/v4/projects/7930/trigger/pipeline - echo sha - name: Trigger benchmarks (Push main) id: setup_push if: ${{ github.event_name == 'push' }} @@ -40,9 +39,11 @@ jobs: -F "ref=main" \ -F "variables[SHA]=$GITHUB_SHA" \ -F "variables[SHORT_SHA]=${SHORT_SHA}" \ + -F "variables[BRANCH]=main" \ -F "variables[AUTHOR]=${{ github.event.head_commit.committer.username }}" \ https://codebase.helmholtz.cloud/api/v4/projects/7930/trigger/pipeline - name: Create status + if: ${{ steps.setup_pr.outcome == 'success' || steps.setup_push.outcome == 'success'}} run: | curl -L -X POST \ -H "Accept: application/vnd.github+json" \