Skip to content

Commit

Permalink
Add ExtendedComponent and RegionData classes
Browse files Browse the repository at this point in the history
  • Loading branch information
jfoster17 committed Sep 13, 2023
1 parent 1a5c767 commit eabe4e3
Show file tree
Hide file tree
Showing 6 changed files with 607 additions and 5 deletions.
105 changes: 104 additions & 1 deletion glue/core/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import numpy as np
import pandas as pd
import shapely

from glue.core.coordinate_helpers import dependent_axes, pixel2world_single_axis
from glue.utils import shape_to_string, coerce_numeric, categorical_ndarray
Expand All @@ -13,7 +14,7 @@
DASK_INSTALLED = False

__all__ = ['Component', 'DerivedComponent', 'CategoricalComponent',
'CoordinateComponent', 'DateTimeComponent']
'CoordinateComponent', 'DateTimeComponent', 'ExtendedComponent']


class Component(object):
Expand Down Expand Up @@ -107,6 +108,13 @@ def datetime(self):
"""
return False

@property
def extended(self):
"""
Whether or not or not the datatype represents an extended region
"""
return False

def __str__(self):
return "%s with shape %s" % (self.__class__.__name__, shape_to_string(self.shape))

Expand Down Expand Up @@ -549,3 +557,98 @@ def categorical(self):
@property
def datetime(self):
return False


class ExtendedComponent(Component):
"""
A data component representing an extent or a region.
This component can be used when a dataset describes regions or ranges
and is typically used with a :class:`~glue.core.data.RegionData` object.
For example, a :class:`~glue.core.data.RegionData` object might provide
properties of geographic regions, and the boundaries of these regions
would be an ExtendedComponent.
Data loaders are required to know how to convert regions to a list
of Shapely objects which can be used to initialize an ExtendedComponent.
A circular region can be represented as:
circle = shapely.Point(x, y).buffer(rad)
A range in one dimension can be represented as:
range = shapely.LineString([[x0,0],[x1,0]])
(This is a bit of an odd representation, since we are forced to specify a y
coordinate for this line. We adopt a convention of y == 0.)
ExtendedComponents are NOT used directly in linking. Instead, ExtendedComponents
always have corresponding ComponentIDs that represent the x (and y) coordinates
over which the regions are defined. If not specified otherwise, a
:class:`~glue.core.data.RegionData` object will create `representative points`
for each region, representing a point near the center of the reigon that is
guaranteed to be inside the region.
NOTE: that this implementation does not support regions in more than
two dimensions. (Shapely has limited support for 3D shapes, but not more).
Parameters
----------
data : list of :class:`~shapely.Geometry` objects
The data to store.
center_comp_ids : list of :class:`glue.core.component_id.ComponentID` objects
The ComponentIDs of the `center` of the extended region. These do not
have to be the literal center of the region, but they must be in the x (and y)
coordinates of the regions. These componentIDs are used in the linking
framework to allow an ExtendedComponent to be linked to other components.
units : `str`, optional
Unit description.
Attributes
----------
x : ComponentID
The ComponentID of the x coordinate at the center of the extended region.
y : ComponentID
The ComponentID of the y coordinate at the center of the extended region.
Raises
------
TypeError
If data is not a list of shapely.Geometry objects
ValueError
If center_comp_ids is not a list of length 1 or 2
"""
def __init__(self, data, center_comp_ids, units=None):
if not all(isinstance(s, shapely.Geometry) for s in data):
raise TypeError(
"Input data for a ExtendedComponent should be a list of shapely.Geometry objects"
)
if len(center_comp_ids) == 2:
self.x = center_comp_ids[0]
self.y = center_comp_ids[1]
elif len(center_comp_ids) == 1:
self.x = center_comp_ids[0]
self.y = None
else:
raise ValueError(
"ExtendedComponent must be initialized with one or two ComponentIDs"
)
self.units = units
self._data = data

@property
def extended(self):
return True

@property
def numeric(self):
return False

@property
def datetime(self):
return False

@property
def categorical(self):
return False
162 changes: 161 additions & 1 deletion glue/core/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import numpy as np
import pandas as pd
import shapely

from fast_histogram import histogram1d, histogram2d

Expand Down Expand Up @@ -36,7 +37,7 @@
# Note: leave all the following imports for component and component_id since
# they are here for backward-compatibility (the code used to live in this
# file)
from glue.core.component import Component, CoordinateComponent, DerivedComponent
from glue.core.component import Component, CoordinateComponent, DerivedComponent, ExtendedComponent
from glue.core.component_id import ComponentID, ComponentIDDict, PixelComponentID

try:
Expand Down Expand Up @@ -2057,3 +2058,162 @@ def pixel_label(i, ndim):
if 1 <= ndim <= 3:
label += " [{0}]".format('xyz'[ndim - 1 - i])
return label


class RegionData(Data):
"""
A glue Data object for storing data that is associated with a region.
This object can be used when a dataset describes 2D regions or 1D ranges. It
contains exactly one :class:`~glue.core.component.ExtendedComponent` object
which contains the boundaries of the regions, and must also contain
one or two components that give the center of the regions in whatever data
coordinates the regions are described in. Links in glue are not made
directly on the ExtendedComponent, but instead on the center components.
Thus, a subset that includes the center of a region will include that region,
but a subset that includes just a little part of the region will not include
that region. These center components are not the same pixel components. For
example, a dataset that is a table of 2D regions with have a single PixelComponent,
but must have two of these center components.
A typical use case for this object is to store the properties of geographic
regions, where the boundaries of the regions are stored in an ExtendedComponent,
and the centers of the regions are stored in two components, one for the
longitude and one for the latitude. Additional components may describe
arbitrary properties of these geographic regions (e.g. population, area, etc).
Parameters
----------
label : `str`, optional
The label of the data.
coords : :class:`~glue.core.coordinates.Coordinates`, optional
The coordinates associated with the data.
**kwargs
All other keyword arguments are passed to the :class:`~glue.core.data.Data`
constructor.
Attributes
----------
extended_component_id : :class:`~glue.core.component_id.ComponentID`
The ID of the ExtendedComponent object that contains the boundaries of
the regions.
center_x_id : :class:`~glue.core.component_id.ComponentID`
The ID of the Component object that contains the x-coordinate of the
center of the regions. This is actually stored in the component
with the extended_component_id, but it is convenient to have it here.
center_y_id : :class:`~glue.core.component_id.ComponentID`
The ID of the Component object that contains the y-coordinate of the
center of the regions. This is actually stored in the component
with the extended_component_id, but it is convenient to have it here.
Examples
--------
There are two main options for initializing a RegionData object. The first is
to simply pass in a list of Shapely objects with dimesionality N, from which
we will create N+1 components: one ExtendedComponent with the boundaries, and
N regular Component(s) with the center coordinates computed from the Shapley
method :meth:`~shapely.geometry.base.BaseGeometry.representative_point`.
>>> geometries = [shapely.geometry.Point(0, 0).buffer(1), shapely.geometry.Point(1, 1).buffer(1)]
>>> my_region_data = RegionData(label='My Regions', boundary=geometries)
This will create a RegionData object with three components: one ExtendedComponent
with label "geo" and two regular Components with labels "Center [x] for boundary"
and "Center [y] for boundary".
The second is to explicitly create an ExtendedComponent (which requires passing
in the ComponentIDs for the center coordinates) and then use `add_component` to
add this component to a RegionData object. You might use this point if your
dataset already contains points that represent the centers of your regions and
you want to avoid re-calculating them. For example:
>>> center_x = [0, 1]
>>> center_y = [0, 1]
>>> geometries = [shapely.geometry.Point(0, 0).buffer(1), shapely.geometry.Point(1, 1).buffer(1)]
>>> my_region_data = RegionData(label='My Regions')
>>> # Region IDs are created and returned when we add a Component to a Data object
>>> cen_x_id = my_region_data.add_component(center_x, label='Center [x]')
>>> cen_y_id = my_region_data.add_component(center_y, label='Center [y]')
>>> extended_comp = ExtendedComponent(geometries, center_comp_ids=[cen_x_id, cen_y_id])
>>> my_region_data.add_component(extended_comp, label='boundaries')
"""

def __init__(self, label="", coords=None, **kwargs):
self._extended_component_id = None
self._center_x_id = None
self._center_y_id = None
# __init__ calls add_component which deals with ExtendedComponent logic
super().__init__(label=label, coords=coords, **kwargs)

def __repr__(self):
return f'RegionData (label: {self.label} | extended_component: {self.extended_component_id})'

@property
def center_x_id(self):
return self.get_component(self.extended_component_id).x

@property
def center_y_id(self):
return self.get_component(self.extended_component_id).y

@property
def extended_component_id(self):
return self._extended_component_id

@contract(component='component_like', label='cid_like')
def add_component(self, component, label):
""" Add a new component to this data set, allowing only one ExtendedComponent
If component is an array of Shapely objects then we use
:meth:`~shapely.geometry.base.BaseGeometry.representative_point` to
create two new components for the center coordinates of the regions and
add them to the RegionData object as well.
If component is an ExtendedComponent, then we simply add it to the
RegionData object.
We do this here instead of extending `Component.autotyped` because
we only want to use :class:`~glue.core.component.ExtendedComponent` objects
in the context of a :class:`~glue.core.data.RegionData` object.
Parameters
----------
component : :class:`~glue.core.component.Component` or array-like
Object to add. If this is an array of Shapely objects, then we
create two new components for the center coordinates of the regions
as well.
label : `str` or :class:`~glue.core.component_id.ComponentID`
The label. If this is a string, a new
:class:`glue.core.component_id.ComponentID`
with this label will be created and associated with the Component.
Raises
------
`ValueError`, if the RegionData already has an extended component
"""

if not isinstance(component, Component):
if all(isinstance(s, shapely.Geometry) for s in component):
center_x = []
center_y = []
for s in component:
rep = s.representative_point()
center_x.append(rep.x)
center_y.append(rep.y)
cen_x_id = super().add_component(np.asarray(center_x), f"Center [x] for {label}")
cen_y_id = super().add_component(np.asarray(center_y), f"Center [y] for {label}")
ext_component = ExtendedComponent(np.asarray(component), center_comp_ids=[cen_x_id, cen_y_id])
self._extended_component_id = super().add_component(ext_component, label)
return self._extended_component_id

if isinstance(component, ExtendedComponent):
if self.extended_component_id is not None:
raise ValueError(f"Cannot add another ExtendedComponent; existing extended component: {self.extended_component_id}")
else:
self._extended_component_id = super().add_component(component, label)
return self._extended_component_id
else:
return super().add_component(component, label)
Loading

0 comments on commit eabe4e3

Please sign in to comment.