[pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
pypsa-meets-earth · Dec 14, 2024 · 36ed4f6 · 36ed4f6
1 parent e30214f
commit 36ed4f6
Show file tree

Hide file tree

Showing 7 changed files with 105 additions and 69 deletions.
diff --git a/scripts/build_demand.py b/scripts/build_demand.py
@@ -132,13 +132,11 @@ def get_WorldPop_data(
     return WorldPop_inputfile, WorldPop_filename
 
 
-def estimate_microgrid_population(
-    raster_path, shapes_path, output_file
-):
+def estimate_microgrid_population(raster_path, shapes_path, output_file):
     """
     Estimates the population within each microgrid by using raster data and shape geometries.
-    The function processes population density raster data and calculates the total population 
-    for each microgrid by masking the raster data using the corresponding geometries from a 
+    The function processes population density raster data and calculates the total population
+    for each microgrid by masking the raster data using the corresponding geometries from a
     GeoJSON file. The population estimates are saved as a CSV file.
 
     Parameters
@@ -183,7 +181,7 @@ def estimate_microgrid_population(
         list(population_data.items()), columns=["Microgrid_Name", "Population"]
     )
     # Save the population estimates to a CSV file
-    #population_df.to_csv(output_file, index=False)
+    # population_df.to_csv(output_file, index=False)
 
     return population_df
 
@@ -202,7 +200,7 @@ def calculate_load(
     inclusive,
 ):
     """
-    Calculate the microgrid demand based on a load profile provided as input, 
+    Calculate the microgrid demand based on a load profile provided as input,
     appropriately scaled according to the population calculated for each cluster
     The output includes a time-indexed DataFrame containing the load for each bus in the microgrid
     and is saved as a CSV file.
@@ -226,7 +224,7 @@ def calculate_load(
     microgrids_list : dict
         Dictionary with microgrid names as keys and their cluster information as values.
     start_date : str
-        Start date for filtering the time series data 
+        Start date for filtering the time series data
     end_date : str
         End date for filtering the time series data
     inclusive : str
@@ -237,10 +235,8 @@ def calculate_load(
         DataFrame containing the calculated load profile for all microgrids.
 
     """
-     # Estimate the population for the two microgrid
-    pop_microgrid = estimate_microgrid_population(
-        raster_path, shapes_path, output_file
-    )
+    # Estimate the population for the two microgrid
+    pop_microgrid = estimate_microgrid_population(raster_path, shapes_path, output_file)
     # Load the building classification data
     building_class = pd.read_csv(input_path)
     # Dictionary to store the load profiles for each microgrid
@@ -252,7 +248,6 @@ def calculate_load(
     time_index = pd.date_range(start="2013-01-01", end="2013-12-31 23:00:00", freq="h")
     df = df.set_index(time_index)
 
-
     # Apply time filtering based on the specified start and end dates
     if inclusive == "left":
         end_date = (pd.to_datetime(end_date) - pd.Timedelta(days=1)).strftime(
@@ -303,7 +298,6 @@ def calculate_load(
     return all_load_per_cluster
 
 
-
 def calculate_load_ramp(
     input_file_buildings,
     n,
@@ -323,15 +317,14 @@ def calculate_load_ramp(
     date_end,
     inclusive,
 ):
-    
+
     cleaned_buildings = gpd.read_file(input_file_buildings)
     house = cleaned_buildings[cleaned_buildings["tags_building"] == "house"]
     pop_microgrid, microgrid_load = estimate_microgrid_population(
         n, p, raster_path, shapes_path, sample_profile, output_file
     )
     density = pop_microgrid / house["area_m2"].sum()
 
-
     grouped_buildings = cleaned_buildings.groupby("cluster_id")
     clusters = np.sort(cleaned_buildings["cluster_id"].unique())
     house_area_for_cluster = [
@@ -441,9 +434,17 @@ def calculate_load_ramp_std(
     ]
 
     mean_demand_tier_df = pd.DataFrame(
-    {f"tier_{i+1}": pd.read_excel(file)["mean"] for i, file in enumerate(demand_files)})
+        {
+            f"tier_{i+1}": pd.read_excel(file)["mean"]
+            for i, file in enumerate(demand_files)
+        }
+    )
     std_demand_tier_df = pd.DataFrame(
-    {f"tier_{i+1}": pd.read_excel(file)["std"] for i, file in enumerate(demand_files)})
+        {
+            f"tier_{i+1}": pd.read_excel(file)["std"]
+            for i, file in enumerate(demand_files)
+        }
+    )
     mean_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
     std_demand_tier_df.insert(0, "tier_0", np.zeros(len(mean_demand_tier_df)))
     mean_demand_tier_df.index = pd.date_range(
@@ -453,15 +454,19 @@ def calculate_load_ramp_std(
         "00:00:00", periods=len(mean_demand_tier_df), freq="H"
     ).time
 
-    pop= estimate_microgrid_population(raster_path, shapes_path,output_file)
+    pop = estimate_microgrid_population(raster_path, shapes_path, output_file)
 
     all_microgrid_loads = pd.DataFrame()
 
     for grid_name, grid_data in microgrid_list.items():
-        microgrid_buildings=cleaned_buildings[cleaned_buildings["name_microgrid"]==grid_name]
+        microgrid_buildings = cleaned_buildings[
+            cleaned_buildings["name_microgrid"] == grid_name
+        ]
         # Calculate the population density for the current microgrid based only on house buildings
         house = microgrid_buildings[microgrid_buildings["tags_building"] == "house"]
-        pop_microgrid = pop.loc[pop["Microgrid_Name"] == grid_name, "Population"].values[0]
+        pop_microgrid = pop.loc[
+            pop["Microgrid_Name"] == grid_name, "Population"
+        ].values[0]
         density = pop_microgrid / house["area_m2"].sum()
 
         # Calculate population per cluster
@@ -496,13 +501,11 @@ def calculate_load_ramp_std(
             [std_demand_tier_df] * len(date_range), ignore_index=True
         )
 
-     # Calculate load for each cluster and tier
+        # Calculate load for each cluster and tier
         result_dict = {}
         for k, pop_cluster in tier_pop_df.iterrows():
             load_df = pd.DataFrame()
-            for j, n_person in enumerate(
-                pop_cluster / 7            # Scale by family size
-            ):  
+            for j, n_person in enumerate(pop_cluster / 7):  # Scale by family size
                 mean_load = mean_demand_tier_df_extended.iloc[:, j] * n_person
                 std_load = np.random.normal(
                     mean_demand_tier_df_extended.iloc[:, j],
@@ -518,7 +521,9 @@ def calculate_load_ramp_std(
         }
         tot_loads_df = pd.concat(tot_result_dict.values(), axis=1)
         if inclusive == "left":
-            date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")[:-1]
+            date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")[
+                :-1
+            ]
         else:
             date_range_tot = pd.date_range(start=date_start, end=date_end, freq="H")
         tot_loads_df.index = date_range_tot
@@ -533,7 +538,6 @@ def calculate_load_ramp_std(
     all_microgrid_loads.to_csv(output_path_csv)
 
 
-
 if __name__ == "__main__":
     if "snakemake" not in globals():
         from _helpers_dist import mock_snakemake

diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
@@ -23,7 +23,7 @@ def create_microgrid_shapes(microgrids_list, output_path):
     output_path : str
        Path where the GeoJSON file will be saved.
     """
-    
+
     # Open the input dictionary into a pandas DataFrame for easier processing
     microgrids_list_df = pd.DataFrame(microgrids_list)
 
@@ -61,13 +61,13 @@ def create_microgrid_shapes(microgrids_list, output_path):
 def create_bus_regions(microgrids_list, output_path):
     """
     Creates bus regions for each microgrid in the list of microgrids and saves them as a GeoJSON file.
-    The generated shape will be used for the calculation of renewable energy producibility, 
+    The generated shape will be used for the calculation of renewable energy producibility,
     which will be associated with the bus generated at the center of the geometry.
     Parameters
     ----------
     microgrids_list : dict
         Dictionary containing the microgrid names and their bounding box coordinates (lat_min, lon_min, lat_max, lon_max).
-        
+
     output_path : str
        Path where the GeoJSON file will be saved.
     """
@@ -83,8 +83,8 @@ def create_bus_regions(microgrids_list, output_path):
 
     # Iterate over each column in the DataFrame
     for col in range(len(microgrids_list_df.columns)):
-        values = microgrids_list_df.iloc[:, col]  
-        microgrid_name = microgrids_list_df.columns[col] + "_bus_renewable"  
+        values = microgrids_list_df.iloc[:, col]
+        microgrid_name = microgrids_list_df.columns[col] + "_bus_renewable"
 
         # Define the vertices of the rectangle
         Top_left = (values[0], values[3])
@@ -111,8 +111,8 @@ def create_bus_regions(microgrids_list, output_path):
     microgrid_gdf = gpd.GeoDataFrame(
         {
             "name": microgrid_names,  # Names of the bus regions
-            "x": microgrid_x,         # x-coordinates of the centers
-            "y": microgrid_y,         # y-coordinates of the centers
+            "x": microgrid_x,  # x-coordinates of the centers
+            "y": microgrid_y,  # y-coordinates of the centers
             "geometry": microgrid_shapes,  # Polygon shapes of the regions
         }
     )

diff --git a/scripts/clean_earth_osm_data.py b/scripts/clean_earth_osm_data.py
@@ -15,7 +15,7 @@
 
 def extract_points(microgrid_shape_path, buildings_path, output_path):
     """
-    From the downloaded data, extracts buildings located within the boundaries of each microgrid geometry 
+    From the downloaded data, extracts buildings located within the boundaries of each microgrid geometry
     and associates them with the respective microgrid name.
 
     Parameters
@@ -30,10 +30,10 @@ def extract_points(microgrid_shape_path, buildings_path, output_path):
     Returns
     -------
     GeoDataFrame
-        A GeoDataFrame containing the filtered buildings with an added field "name_microgrid" 
+        A GeoDataFrame containing the filtered buildings with an added field "name_microgrid"
         that associates each building to its corresponding microgrid.
     """
-    
+
     # Load the GeoJSON files
     microgrid = gpd.read_file(microgrid_shape_path)
     buildings = gpd.read_file(buildings_path)
@@ -44,12 +44,16 @@ def extract_points(microgrid_shape_path, buildings_path, output_path):
         # Extract the name of the microgrid
         microgrid_name = microgrid_shape["name"]
         # Filter buildings located within the microgrid geometry
-        buildings_in_microgrid = buildings[buildings.geometry.within(microgrid_shape.geometry)]
+        buildings_in_microgrid = buildings[
+            buildings.geometry.within(microgrid_shape.geometry)
+        ]
         # Add or replace the "name_microgrid" field with the microgrid name
         buildings_in_microgrid = buildings_in_microgrid.copy()
         buildings_in_microgrid["name_microgrid"] = microgrid_name
         # Append the filtered buildings to the final result
-        result = gpd.GeoDataFrame(pd.concat([result, buildings_in_microgrid], ignore_index=True))
+        result = gpd.GeoDataFrame(
+            pd.concat([result, buildings_in_microgrid], ignore_index=True)
+        )
     # Save the final result as a GeoJSON file
     result.to_file(output_path, driver="GeoJSON")
 

diff --git a/scripts/cluster_buildings.py b/scripts/cluster_buildings.py
@@ -22,7 +22,7 @@
 def buildings_classification(input_file, crs):
     """
     Filters the data contained in the input GeoJSON file, selecting only Polygon elements.
-    Calculates the plan area for each building based on the specified coordinate system (CRS) 
+    Calculates the plan area for each building based on the specified coordinate system (CRS)
     and adds this information as a new column to the GeoDataFrame.
     Buildings classified as "yes" with an area below a predefined limit are reclassified as "house".
 
@@ -96,13 +96,17 @@ def get_central_points_geojson_with_buildings(
     # Classify and process the buildings
     microgrid_buildings = buildings_classification(input_filepath, crs)
     # Prepare GeoDataFrames and DataFrames to accumulate results
-    all_central_features = gpd.GeoDataFrame(columns=["geometry", "cluster", "name_microgrid"])
+    all_central_features = gpd.GeoDataFrame(
+        columns=["geometry", "cluster", "name_microgrid"]
+    )
     all_microgrid_buildings = gpd.GeoDataFrame(columns=microgrid_buildings.columns)
     all_buildings_class = pd.DataFrame()
     # Process each microgrid individually
     for grid_name, grid_data in microgrids_list.items():
         # Filter buildings belonging to the current microgrid
-        filtered_buildings = microgrid_buildings[microgrid_buildings["name_microgrid"] == grid_name]
+        filtered_buildings = microgrid_buildings[
+            microgrid_buildings["name_microgrid"] == grid_name
+        ]
         # Extract centroids of each building as coordinates
         centroids_building = [
             (row.geometry.centroid.x, row.geometry.centroid.y)
@@ -112,7 +116,7 @@ def get_central_points_geojson_with_buildings(
         # Apply KMeans clustering to group the buildings
         kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(centroids_building)
         # Get the coordinates of cluster centroids
-        centroids = kmeans.cluster_centers_  
+        centroids = kmeans.cluster_centers_
         # Identify the central point for each cluster
         central_points = []
         for i in range(kmeans.n_clusters):
@@ -125,32 +129,46 @@ def get_central_points_geojson_with_buildings(
         for i, central_point in enumerate(central_points):
             central_features.append(
                 {
-                    "geometry": Point(central_point), 
+                    "geometry": Point(central_point),
                     "cluster": i,
                     "name_microgrid": grid_name,
                 }
             )
         central_features_gdf = gpd.GeoDataFrame(
             central_features, crs=filtered_buildings.crs
         ).to_crs("EPSG:4326")
-        all_central_features = pd.concat([all_central_features, central_features_gdf], ignore_index=True)
+        all_central_features = pd.concat(
+            [all_central_features, central_features_gdf], ignore_index=True
+        )
 
         # Assign cluster IDs to buildings and append to the results
         clusters = kmeans.labels_
         filtered_buildings["cluster_id"] = clusters.astype(int)
-        all_microgrid_buildings = pd.concat([all_microgrid_buildings, filtered_buildings], ignore_index=True)
+        all_microgrid_buildings = pd.concat(
+            [all_microgrid_buildings, filtered_buildings], ignore_index=True
+        )
 
         # Count building types within each cluster and append to the summary
         buildings_class = (
-            filtered_buildings.groupby("cluster_id").tags_building.value_counts().reset_index(name="count")
+            filtered_buildings.groupby("cluster_id")
+            .tags_building.value_counts()
+            .reset_index(name="count")
         )
         buildings_class["name_microgrid"] = grid_name
-        all_buildings_class = pd.concat([all_buildings_class, buildings_class], ignore_index=True)
+        all_buildings_class = pd.concat(
+            [all_buildings_class, buildings_class], ignore_index=True
+        )
 
     # Save all the results to their respective output files
-    all_central_features.to_file(output_filepath_centroids, driver="GeoJSON")  # Save cluster centroids as GeoJSON
-    all_microgrid_buildings.to_file(output_filepath_buildings, driver="GeoJSON")  # Save clustered buildings as GeoJSON
-    all_buildings_class.to_csv(output_path_csv, index=False)  # Save building type counts as CSV
+    all_central_features.to_file(
+        output_filepath_centroids, driver="GeoJSON"
+    )  # Save cluster centroids as GeoJSON
+    all_microgrid_buildings.to_file(
+        output_filepath_buildings, driver="GeoJSON"
+    )  # Save clustered buildings as GeoJSON
+    all_buildings_class.to_csv(
+        output_path_csv, index=False
+    )  # Save building type counts as CSV
 
 
 if __name__ == "__main__":