Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix capacity factor timeseries in convert.py #346

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions atlite/__init__.py
Original file line number Diff line number Diff line change
@@ -31,6 +31,7 @@

# e.g. "0.17.1" or "0.17.1.dev4+ga3890dc0" (if installed from git)
__version__ = version("atlite")
__version__ = 0.15.0
# e.g. "0.17.0" # TODO, in the network structure it should use the dev version
match = re.match(r"(\d+\.\d+(\.\d+)?)", __version__)
assert match, f"Could not determine release_version of pypsa: {__version__}"
99 changes: 99 additions & 0 deletions atlite/aggregate.py
Original file line number Diff line number Diff line change
@@ -27,3 +27,102 @@ def aggregate_matrix(da, matrix, index):
else:
da = da.stack(spatial=("y", "x")).transpose("spatial", "time")
return xr.DataArray(matrix * da, [index, da.coords["time"]])


def aggregate_gridcells(da, matrix, index, freq=None, agg="sum"):
"""
Resample and aggregate the data array `da` based on the specified frequency
and aggregation method using a matrix for spatial aggregation.

Parameters:
-----------
da : xarray.DataArray
The data array to process, typically containing time-series data
with spatial dimensions ("y", "x").
matrix : sparse matrix or equivalent
Sparse matrix used to map spatial grid cells to aggregated regions
(e.g., buses or zones).
index : pandas.Index or xarray.Index
Index corresponding to the aggregated regions (e.g., bus IDs).
freq : str, optional
Resampling frequency string (e.g., 'D' for daily, 'M' for monthly).
If None, no resampling is applied.
agg : str, optional
Aggregation method to apply. Options are "mean" or "sum"
(default is "sum").

Returns:
--------
da : xarray.DataArray
The aggregated data array after spatial and temporal processing.
layout : xarray.DataArray
A DataArray representing the layout or weight of each grid cell in the
aggregation process, aligned with the output dimensions.
"""
# Ensure the `index` has a name; if not, assign a default name.
if index.name is None:
index = index.rename("dim_0")

# Check if the input data uses Dask for lazy evaluation.
if isinstance(da.data, dask.array.core.Array):
# Stack the spatial dimensions ("y", "x") into a single "spatial" dimension for efficient processing.
da = da.stack(spatial=("y", "x"))
da = da.chunk({"spatial": -1}) # Optimize chunking along the "spatial" dimension.

# Convert the sparse matrix into a dense DataArray for compatibility with xarray operations.
layout = xr.DataArray(
matrix.toarray(),
dims=(index.name, "spatial"),
coords={index.name: index, "spatial": da["spatial"]},
)

# Transpose the layout so that the "spatial" dimension comes first for alignment with `da`.
layout = layout.transpose("spatial", index.name)

# Expand `da` to include the `index.name` dimension for matrix multiplication.
da = da.expand_dims({index.name: layout[index.name]})

# Perform element-wise multiplication of `da` and `layout` for spatial aggregation.
da = da * layout
else:
# For non-Dask arrays, follow a similar process without lazy evaluation.
da = da.stack(spatial=("y", "x")).transpose("spatial", "time")

layout = xr.DataArray(
matrix.toarray(),
dims=(index.name, "spatial"),
coords={index.name: index, "spatial": da["spatial"]},
)
layout = layout.transpose("spatial", index.name)

# Element-wise multiplication for spatial aggregation.
da = xr.DataArray(da * layout)

# Unstack the "spatial" dimension back into the original "y" and "x" dimensions.
da = da.unstack("spatial")
layout = layout.unstack("spatial")

# Resample the data if a frequency is provided.
if freq is not None:
da = da.resample(time=freq) # Resample along the "time" dimension.

# Apply the specified aggregation method during resampling.
if agg == "mean":
da = da.mean("time", keep_attrs=True)
elif agg == "sum":
da = da.sum("time", keep_attrs=True)
else:
# Raise an error if an invalid aggregation method is specified.
raise ValueError(
f"Invalid aggregation method '{agg}' for frequency '{freq}'. "
"Use 'mean' or 'sum' instead."
)
else:
# If no frequency is provided, apply aggregation directly over time.
if agg == "mean":
da = da.mean("time", keep_attrs=True)
elif agg == "sum":
da = da.sum("time", keep_attrs=True)
# If `agg` is None or unsupported, leave the data unmodified.

return da, layout
Loading