Skip to content

Commit

Permalink
Make AGV preprocessing reproducible (#165)
Browse files Browse the repository at this point in the history
Instead of local paths, this uses the CloudStorage class from the
`ribasim_nl` package to load AGV data, such that this code can also run
on other machines.

I uncommented the first processing part since that was needed for the
second part to run.

This also adds an upload of the processed data to the cloud to
`AmstelGooienVecht/verwerkt/preprocessed.gpkg`, so the result of this
step is directly accesible as well.

It should not change and results, but it's good to go over this with
@rbruijnshkv tomorrrow.
  • Loading branch information
visr authored Oct 17, 2024
1 parent 71b67e6 commit 05dfcb4
Showing 1 changed file with 93 additions and 93 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
"metadata": {},
"outputs": [],
"source": [
"# import packages and functions\n",
"import os\n",
"\n",
"import geopandas as gpd\n",
"import numpy as np\n",
"import pandas as pd"
"import pandas as pd\n",
"from general_functions import show_layers_and_columns, store_data\n",
"from ribasim_nl import CloudStorage\n",
"from shapely import wkt"
]
},
{
Expand All @@ -22,19 +22,19 @@
"metadata": {},
"outputs": [],
"source": [
"from general_functions import *\n",
"\n",
"%load_ext autoreload\n",
"%autoreload 2\n",
"pd.set_option(\"display.max_columns\", None)"
]
},
{
"cell_type": "markdown",
"cell_type": "code",
"execution_count": null,
"id": "2",
"metadata": {},
"outputs": [],
"source": [
"# Amstel, Gooi en Vecht"
"cloud = CloudStorage()"
]
},
{
Expand All @@ -44,11 +44,8 @@
"metadata": {},
"outputs": [],
"source": [
"# define relative paths\n",
"waterschap = \"AVG\"\n",
"path_AVG = \"..\\..\\Data_preprocessed\\Waterschappen\\AmstelGooienVecht\"\n",
"output_gpkg_path = \"../../Data_postprocessed/Waterschappen/AmstelGooienVecht\"\n",
"DM_path = \"..\\..\\Data_overig\\DM_Netwerk\\DM_Netwerk.shp\""
"waterschap_long = \"AmstelGooienVecht\""
]
},
{
Expand All @@ -58,58 +55,95 @@
"metadata": {},
"outputs": [],
"source": [
"# #AVG has delivered all data in CSV format. Load it in manually with some data mutations\n",
"# AVG = {}\n",
"# variables = ['stuw', 'gemaal', 'afsluitmiddel', 'duikersifonhevel', 'hydroobject']#, 'peilgebiedpraktijk', 'peilafwijkinggebied']\n",
"# for variable in variables:\n",
"# path_variable = os.path.join(path_AVG, variable + '.csv')\n",
"# df_var = pd.read_csv(path_variable, delimiter=';')\n",
"# geom_col = df_var.keys()[-1] #retrieve the column name\n",
"cloud.download_aangeleverd(waterschap_long)"
]
},
{
"cell_type": "markdown",
"id": "5",
"metadata": {},
"source": [
"# Amstel, Gooi en Vecht"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6",
"metadata": {},
"outputs": [],
"source": [
"# define paths\n",
"aangeleverd_dir = cloud.joinpath(waterschap_long, \"aangeleverd\")\n",
"verwerkt_dir = cloud.joinpath(waterschap_long, \"verwerkt\")\n",
"\n",
"# if not 'geometrie' in geom_col:\n",
"# raise ValueError('No \"geometry\" string found in the last column of the dataframe. Check for existence')\n",
"output_gpkg_path = verwerkt_dir / \"preprocessed\"\n",
"\n",
"# df_var['geometry'] = df_var[geom_col].apply(lambda x: wkt.loads(x.split(';')[-1]))\n",
"# AVG[variable] = df_var\n",
"# pyogrio needs the exclamation mark to read the file from the zip\n",
"dump_path = (\n",
" aangeleverd_dir / \"aanlevering_6maart24/data dump 6 maart LHM AGV.zip!/data dump 6 maart LHM AGV/\"\n",
").as_posix()\n",
"\n",
"# #there is one last gpkg which contains the streefpeilen (and peilgebieden)\n",
"# AVG['peilgebied'] = gpd.read_file(os.path.join(path_AVG, 'vigerende_peilgebieden.gpkg'))"
"verwerkt_dir.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5",
"id": "7",
"metadata": {},
"outputs": [],
"source": [
"# AVG['peilgebied']['streefpeil'] = np.nan\n",
"# AVG['peilgebied']['streefpeil'] = AVG['peilgebied']['streefpeil'].fillna(value=AVG['peilgebied']['GPGZMRPL'])\n",
"# AVG['peilgebied']['streefpeil'] = AVG['peilgebied']['streefpeil'].fillna(value=AVG['peilgebied']['IWS_GPGVASTP'])\n",
"# AVG['peilgebied']['streefpeil'] = AVG['peilgebied']['streefpeil'].fillna(value=AVG['peilgebied']['IWS_GPGONDP'])\n",
"# AVG has delivered all data in CSV format. Load it in manually with some data mutations\n",
"AVG = {}\n",
"variables = [\n",
" \"stuw\",\n",
" \"gemaal\",\n",
" \"afsluitmiddel\",\n",
" \"duikersifonhevel\",\n",
" \"hydroobject\",\n",
"] # , 'peilgebiedpraktijk', 'peilafwijkinggebied']\n",
"for variable in variables:\n",
" path_variable = aangeleverd_dir / \"Eerste_levering\" / (variable + \".csv\")\n",
" df_var = pd.read_csv(path_variable, delimiter=\";\")\n",
" geom_col = df_var.keys()[-1] # retrieve the column name\n",
"\n",
" if \"geometrie\" not in geom_col:\n",
" raise ValueError('No \"geometry\" string found in the last column of the dataframe. Check for existence')\n",
"\n",
"# print('Number of missing streefpeilen = ', len(AVG['peilgebied']['streefpeil'].loc[AVG['peilgebied']['streefpeil'].isna()]))\n",
" df_var[\"geometry\"] = df_var[geom_col].apply(lambda x: wkt.loads(x.split(\";\")[-1]))\n",
" AVG[variable] = df_var\n",
"\n",
"# fig, ax = plt.subplots()\n",
"# AVG['peilgebied'].geometry.plot(ax=ax, color='cornflowerblue')\n",
"# AVG['peilgebied'].loc[AVG['peilgebied']['streefpeil'].isna()].geometry.plot(ax=ax, color='red')\n",
"# ax.legend()"
"# there is one last gpkg which contains the streefpeilen (and peilgebieden)\n",
"AVG[\"peilgebied\"] = gpd.read_file(aangeleverd_dir / \"Na_levering\" / \"vigerende_peilgebieden.gpkg\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6",
"id": "8",
"metadata": {},
"outputs": [],
"source": [
"AVG = {}"
"AVG[\"peilgebied\"][\"streefpeil\"] = np.nan\n",
"AVG[\"peilgebied\"][\"streefpeil\"] = AVG[\"peilgebied\"][\"streefpeil\"].fillna(value=AVG[\"peilgebied\"][\"GPGZMRPL\"])\n",
"AVG[\"peilgebied\"][\"streefpeil\"] = AVG[\"peilgebied\"][\"streefpeil\"].fillna(value=AVG[\"peilgebied\"][\"IWS_GPGVASTP\"])\n",
"AVG[\"peilgebied\"][\"streefpeil\"] = AVG[\"peilgebied\"][\"streefpeil\"].fillna(value=AVG[\"peilgebied\"][\"IWS_GPGONDP\"])\n",
"\n",
"print(\n",
" \"Number of missing streefpeilen = \",\n",
" len(AVG[\"peilgebied\"][\"streefpeil\"].loc[AVG[\"peilgebied\"][\"streefpeil\"].isna()]),\n",
")\n",
"\n",
"# fig, ax = plt.subplots()\n",
"# AVG['peilgebied'].geometry.plot(ax=ax, color='cornflowerblue')\n",
"# AVG['peilgebied'].loc[AVG['peilgebied']['streefpeil'].isna()].geometry.plot(ax=ax, color='red')\n",
"# ax.legend()"
]
},
{
"cell_type": "markdown",
"id": "7",
"id": "9",
"metadata": {},
"source": [
"# Nalevering"
Expand All @@ -118,37 +152,26 @@
{
"cell_type": "code",
"execution_count": null,
"id": "8",
"id": "10",
"metadata": {},
"outputs": [],
"source": [
"# overwrite previous data\n",
"AVG[\"stuw\"] = gpd.read_file(\n",
" r\"D:\\Users\\Bruijns\\Documents\\PR4750_20\\Data_preprocessed\\Waterschappen\\AmstelGooienVecht\\data dump 6 maart LHM AGV\\Stuw.shp\"\n",
")\n",
"AVG[\"stuw\"] = gpd.read_file(dump_path + \"/Stuw.shp\")\n",
"AVG[\"stuw\"] = AVG[\"stuw\"].loc[AVG[\"stuw\"].LHM == \"LHM\"]\n",
"\n",
"AVG[\"gemaal\"] = gpd.read_file(\n",
" r\"D:\\Users\\Bruijns\\Documents\\PR4750_20\\Data_preprocessed\\Waterschappen\\AmstelGooienVecht\\data dump 6 maart LHM AGV\\Gemaal.shp\"\n",
")\n",
"AVG[\"gemaal\"] = gpd.read_file(dump_path + \"/Gemaal.shp\")\n",
"AVG[\"gemaal\"] = AVG[\"gemaal\"].loc[AVG[\"gemaal\"].LHM == \"LHM\"]\n",
"\n",
"AVG[\"duikersifonhevel\"] = gpd.read_file(\n",
" r\"D:\\Users\\Bruijns\\Documents\\PR4750_20\\Data_preprocessed\\Waterschappen\\AmstelGooienVecht\\data dump 6 maart LHM AGV\\Duikersifonhevel.shp\"\n",
")\n",
"AVG[\"hydroobject\"] = gpd.read_file(\n",
" r\"D:\\Users\\Bruijns\\Documents\\PR4750_20\\Data_preprocessed\\Waterschappen\\AmstelGooienVecht\\data dump 6 maart LHM AGV\\LHM_hydrovakken.shp\"\n",
")\n",
"\n",
"AVG[\"peilgebied\"] = gpd.read_file(\n",
" r\"D:\\Users\\Bruijns\\Documents\\PR4750_20\\Data_preprocessed\\Waterschappen\\AmstelGooienVecht\\data dump 6 maart LHM AGV\\LHM_gebieden.shp\"\n",
")"
"AVG[\"duikersifonhevel\"] = gpd.read_file(dump_path + \"/DuikerSifonHevel.shp\")\n",
"AVG[\"hydroobject\"] = gpd.read_file(dump_path + \"/LHM_hydrovakken.shp\")\n",
"AVG[\"peilgebied\"] = gpd.read_file(dump_path + \"/LHM_gebieden.shp\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9",
"id": "11",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -160,7 +183,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "10",
"id": "12",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -170,7 +193,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "11",
"id": "13",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -192,17 +215,15 @@
"AVG[\"gemaal\"].loc[AVG[\"gemaal\"].functiegemaal.str.contains(\"anvoergemaal|pmaling|an-|p-|pvoer\"), \"func_aanvoer\"] = True\n",
"AVG[\"gemaal\"].loc[AVG[\"gemaal\"].functiegemaal.str.contains(\"irculatie\"), \"func_circulatie\"] = True\n",
"AVG[\"gemaal\"].loc[\n",
" (AVG[\"gemaal\"].func_afvoer is False)\n",
" & (AVG[\"gemaal\"].func_aanvoer is False)\n",
" & (AVG[\"gemaal\"].func_circulatie is False),\n",
" ~AVG[\"gemaal\"].func_afvoer & ~AVG[\"gemaal\"].func_aanvoer & ~AVG[\"gemaal\"].func_circulatie,\n",
" \"func_afvoer\",\n",
"] = True # set to afvoergemaal is there the function is unknown"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12",
"id": "14",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -219,7 +240,7 @@
"AVG[\"gemaal\"] = gpd.GeoDataFrame(AVG[\"gemaal\"]).to_crs(\"epsg:28992\")\n",
"\n",
"# afsluitmiddel\n",
"AVG[\"afsluitmiddel\"] = AVG[\"afsluitmiddel\"][[\"code\", \"geometry\"]]\n",
"AVG[\"afsluitmiddel\"] = AVG[\"afsluitmiddel\"][[\"code\", \"geometry\"]].copy()\n",
"AVG[\"afsluitmiddel\"].loc[:, \"nen3610id\"] = \"dummy_nen3610id_afsluitmiddel_\" + AVG[\"afsluitmiddel\"].index.astype(str)\n",
"AVG[\"afsluitmiddel\"][\"globalid\"] = \"dummy_globalid_afsluitmiddel_\" + AVG[\"afsluitmiddel\"].index.astype(str)\n",
"AVG[\"afsluitmiddel\"] = gpd.GeoDataFrame(AVG[\"afsluitmiddel\"]).set_crs(\"epsg:28992\")\n",
Expand Down Expand Up @@ -252,15 +273,14 @@
"AVG[\"peilgebied\"][\"nen3610id\"] = \"dummy_nen3610id_peilgebied_\" + AVG[\"peilgebied\"].index.astype(str)\n",
"AVG[\"peilgebied\"][\"globalid\"] = \"dummy_globalid_peilgebied_\" + AVG[\"peilgebied\"].index.astype(str)\n",
"\n",
"\n",
"AVG[\"peilgebied\"] = AVG[\"peilgebied\"][[\"code\", \"nen3610id\", \"globalid\", \"geometry\"]]\n",
"AVG[\"peilgebied\"] = gpd.GeoDataFrame(AVG[\"peilgebied\"]).to_crs(\"epsg:28992\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "13",
"id": "15",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -270,17 +290,9 @@
"AVG[\"hydroobject\"] = gpd.GeoDataFrame(AVG[\"hydroobject\"]).set_crs(\"epsg:28992\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "15",
"id": "16",
"metadata": {},
"source": [
"# Control, store"
Expand All @@ -289,7 +301,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "16",
"id": "17",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -299,32 +311,20 @@
{
"cell_type": "code",
"execution_count": null,
"id": "17",
"id": "18",
"metadata": {},
"outputs": [],
"source": [
"# Check if the directory exists\n",
"if not os.path.exists(output_gpkg_path):\n",
" # If it doesn't exist, create it\n",
" os.makedirs(output_gpkg_path)\n",
"\n",
"store_data(waterschap=AVG, output_gpkg_path=output_gpkg_path + \"/AGV\")"
"store_data(waterschap=AVG, output_gpkg_path=str(output_gpkg_path))\n",
"cloud.upload_verwerkt(waterschap_long)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "18",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Routing",
"display_name": "default",
"language": "python",
"name": "routing"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -336,7 +336,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.12.7"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 05dfcb4

Please sign in to comment.