-
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add gantt chart for pv site data availability
- Loading branch information
1 parent
7688761
commit 85eb717
Showing
1 changed file
with
81 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
"""Gantt chart for India PV systems. This shows where there are gaps in the data.""" | ||
import os | ||
import pandas as pd | ||
import numpy as np | ||
import plotly.express as px | ||
import h5py | ||
|
||
# load hdf file with the generation data for each system | ||
pv_data_hdf = os.environ.get("PV_DATA_HDF") | ||
|
||
# these are the current systems with some data in the hdf file for india | ||
systems_with_data = [ | ||
"56151", | ||
"56709", | ||
"58780", | ||
"59687", | ||
"59710", | ||
"60294", | ||
"60602", | ||
"66634", | ||
"71120", | ||
"72742", | ||
"73347", | ||
"77684", | ||
"77710", | ||
"78186", | ||
"79612", | ||
"81408", | ||
"82081", | ||
"85738", | ||
"86244", | ||
"87410", | ||
"90559", | ||
"91554", | ||
"97094", | ||
"99833", | ||
"100451", | ||
] | ||
|
||
|
||
pv_systems = [] | ||
|
||
# read the hdf file and get start and end dates of available data per site | ||
with h5py.File(pv_data_hdf, "r") as f: | ||
# loop through each pv system in the hdf file | ||
for system_id in systems_with_data: | ||
df = pd.DataFrame(np.array(f["timeseries"][system_id]["table"])) | ||
df["index"] = pd.to_datetime(df["index"], unit="ns") | ||
df = df[df["index"] > pd.Timestamp("2018-01-01")] | ||
# set a value for the end date otherwise it registers as NaT | ||
end_date = df["index"].iloc[-1] | ||
df["index_difference"] = df["index"].diff() | ||
# get startpoints of gaps in the data | ||
df = df[df["index_difference"] > pd.Timedelta("1D")] | ||
# get endpoints of gaps by looking at the difference between indexes | ||
df["previous_endpoint"] = df["index"] - df["index_difference"] | ||
df["endpoints"] = df["previous_endpoint"].shift(-1) | ||
# set the last endpoint to the end date otherwise it registers as NaT | ||
if len(df["endpoints"]) > 0: | ||
df["endpoints"].iloc[-1] = end_date | ||
|
||
# make a dictionary for the gantt chart to plot | ||
# loop over the start and end dates and add to start_end_data dictionary | ||
for index, row in df.iterrows(): | ||
start_end_data = {} | ||
start_end_data["System ID"] = system_id | ||
start_end_data["Start"] = row["index"] | ||
start_end_data["Finish"] = row["endpoints"] | ||
|
||
pv_systems.append(start_end_data) | ||
|
||
# plot the data as gantt chart in plotly | ||
fig = px.timeline( | ||
pv_systems, | ||
x_start="Start", | ||
x_end="Finish", | ||
y="System ID", | ||
color="System ID", | ||
title="Gantt Chart of PV Systems in India", | ||
) | ||
fig.show() |