from copy import deepcopy
from tempfile import NamedTemporaryFile
import geopandas as gpd
@@ -284,11 +284,11 @@ Multi-Target Spatial Prediction using the Meuse Dataset
import matplotlib.pyplot as plt
from matplotlib import cm
= ms.predictors
predictor_files = ms.meuse training_pts_file
= Raster(predictor_files)
stack stack.names
Multi-Target Spatial Prediction using the Meuse Dataset
Pyspatialml implements pandas-style indexing for Raster
objects, using Raster.loc
to index by the name of the raster, and Raster.iloc
to select by index. This method also accepts slices. Label-based indexing is also provided directly by the __getattr_ magic method, i.e. Raster[name]
or for multiple layers Raster[(names)]
.
For example we can remove layers from Raster object using the Raster.drop
method, or by subsetting the raster:
'ffreq') stack.drop(
Raster Object Containing 11 Layers
@@ -314,7 +314,7 @@ Multi-Target Spatial Prediction using the Meuse Dataset
We can store matplotlib cmaps as an attribute within each layer in the Raster:
-= 'RdBu'
stack.chnl_dist.cmap = 'terrain'
stack.dem.cmap = 'Reds'
@@ -329,7 +329,7 @@ stack.dist.cmap Multi-Target Spatial Prediction using the Meuse Dataset
= 'coolwarm' stack.twi.cmap
Plot the predictors in the Raster object as a raster matrix:
-'seaborn-v0_8')
mpl.style.use(= stack.plot(figsize=(9, 7))
axs = axs.flatten()[10]
@@ -350,14 +350,14 @@ ax Multi-Target Spatial Prediction using the Meuse Dataset
Feature Engineering
-We want the prediction results to be depend on the spatial locations of the training data. So to include spatial information, coordinate grids can be generated and added to the Raster object:
-We want the prediction results to be dependent on the spatial locations of the training data. So to include spatial information, coordinate grids can be generated and added to the Raster object:
+= xy_coordinates(
xy_layer =stack.iloc[0],
layer=NamedTemporaryFile(suffix=".tif").name
file_path )
= xy_coordinates(
xy_layer =stack.iloc[0],
layer=NamedTemporaryFile(suffix=".tif").name
@@ -383,11 +383,11 @@ file_pathFeature Engineering
Append them to the Raster object:
-= stack.append([xy_layer, edms]) stack
Plot the new predictors:
-= stack.plot(figsize=(9, 7))
axs = axs.flatten()[10]
ax = ax.images
@@ -406,10 +406,10 @@ im Feature Engineering
The area that is filled by some of the grids is different. This doesn’t matter for the prediction because pixels in the Raster object that include some NaNs in some of the layers will be removed. However, the plots could potentially be given a cleaner look. We can use the Raster.intersect method to fix this:
-= stack.intersect() stack
= stack.plot(figsize=(9, 7))
axs = axs.flatten()[10]
ax = ax.images
@@ -430,7 +430,7 @@ im Feature Engineering
Read the Meuse Dataset
-
+
= gpd.read_file(training_pts_file)
training_pts training_pts.head()
@@ -546,7 +546,7 @@ Read the Meuse Data
Plot the training points:
-
+
from mpl_toolkits.axes_grid1 import make_axes_locatable
= plt.subplots(2, 3, figsize=(8.5, 7))
@@ -581,7 +581,7 @@ fig, axs Read the Meuse Data
Extract Raster Values at the Training Point Locations
Pixel values from a Raster object can be extracted using geometries within a geopandas.GeoDataFrame (points, lines, polygons) or by using labelled pixels from another raster with the same dimensions and crs.
By default the extracted values are returned as a geopandas.GeoDataFrame that contains the data and the coordinates of the pixels:
-
+
= stack.extract_vector(gdf=training_pts)
training_df
= training_df.index.get_level_values("geometry_idx")
@@ -591,7 +591,7 @@ training_df.index =True
)
right_index
-
+
= training_df.dropna()
training_df training_df.head()
@@ -783,13 +783,13 @@
Developing a Machine Learning Model
Here we are going to create a machine learning pipeline that correctly handles categorical predictors via one-hot encoding:
-
+
stack.names
dict_keys(['chnl_dist', 'dem', 'dist', 'ffreq', 'landimg2', 'landimg3', 'landimg4', 'mrvbf', 'rsp', 'slope', 'soil', 'twi', 'x_coordinates', 'y_coordinates', 'tl', 'tr', 'bl', 'br', 'c'])
-
+
from sklearn.pipeline import Pipeline
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.preprocessing import OneHotEncoder
@@ -807,7 +807,7 @@ Develo
'regressor', et)]) (
Now we can separate our response and predictor variables and train the model:
-
+
= training_df.loc[:, stack.names]
X = training_df.loc[:, ['lead', 'cadmium', 'copper', 'zinc', 'om']]
y et.fit(X, y)
@@ -1236,7 +1236,7 @@ Develo
To evaluate the performance of the model, we will use 10-fold cross validation:
-
+
from sklearn.model_selection import cross_validate, KFold
= KFold(n_splits=10, shuffle=True, random_state=1234)
@@ -1251,13 +1251,13 @@ outer Develo
Feature Importances
-
+
= deepcopy(list(stack.names))
ohe_names 0], 'soil1')
ohe_names.insert(soil_idx[0], 'soil2')
ohe_names.insert(soil_idx[= np.array(ohe_names) ohe_names
-
+
'ggplot')
mpl.style.use(
= et.named_steps['regressor'].feature_importances_
@@ -1277,7 +1277,7 @@ fimp Feature Importances
Prediction on the Raster object
-
+
= stack.predict(et)
preds
preds.rename(for old, new in zip(preds.names, ['lead', 'cadmium', 'copper', 'zinc', 'om'])},
@@ -1296,7 +1296,7 @@ {old: new Prediction
Plot the results:
-
+
=(200, 200), title_fontsize=14, figsize=(10, 8))
preds.plot(out_shape plt.show()
diff --git a/examples/Example 1 - Multitarget Regression of Soil Properties.html b/examples/Example 1 - Multitarget Regression of Soil Properties.html
deleted file mode 100644
index 804fba6..0000000
--- a/examples/Example 1 - Multitarget Regression of Soil Properties.html
+++ /dev/null
@@ -1,1423 +0,0 @@
-
-
-
-
-
-
-
-
-
-Pyspatialml: machine learning for raster datasets - Spatial Modelling with Pyspatialml
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-11-output-2.png b/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-11-output-2.png
deleted file mode 100644
index cfdfdf1..0000000
Binary files a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-11-output-2.png and /dev/null differ
diff --git a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-13-output-2.png b/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-13-output-2.png
deleted file mode 100644
index ca98b0d..0000000
Binary files a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-13-output-2.png and /dev/null differ
diff --git a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-15-output-1.png b/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-15-output-1.png
deleted file mode 100644
index dcb4daa..0000000
Binary files a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-15-output-1.png and /dev/null differ
diff --git a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-23-output-1.png b/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-23-output-1.png
deleted file mode 100644
index dee1998..0000000
Binary files a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-23-output-1.png and /dev/null differ
diff --git a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-25-output-2.png b/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-25-output-2.png
deleted file mode 100644
index bc72034..0000000
Binary files a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-25-output-2.png and /dev/null differ
diff --git a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-7-output-2.png b/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-7-output-2.png
deleted file mode 100644
index b405518..0000000
Binary files a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-7-output-2.png and /dev/null differ
diff --git a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-9-output-1.png b/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-9-output-1.png
deleted file mode 100644
index 934a93c..0000000
Binary files a/examples/Example 1 - Multitarget Regression of Soil Properties_files/figure-html/cell-9-output-1.png and /dev/null differ
diff --git a/search.json b/search.json
index 162e916..9cf2acf 100644
--- a/search.json
+++ b/search.json
@@ -1,330 +1,232 @@
[
{
- "objectID": "examples/Example 1 - Multitarget Regression of Soil Properties.html",
- "href": "examples/Example 1 - Multitarget Regression of Soil Properties.html",
- "title": "Spatial Modelling with Pyspatialml",
- "section": "",
- "text": "Here we are using the meuse dataset which is included in the pyspatialml package as an example of performing a spatial model and prediction. We can access the datasets using the pyspatialml.datasets module:\n\nfrom copy import deepcopy\nfrom tempfile import NamedTemporaryFile\nimport geopandas as gpd\nimport numpy as np\nfrom pyspatialml import Raster\nfrom pyspatialml.preprocessing import xy_coordinates, distance_to_corners\nimport pyspatialml.datasets.meuse as ms\n\nimport matplotlib as mpl\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\n\n\npredictor_files = ms.predictors\ntraining_pts_file = ms.meuse\n\n\nstack = Raster(predictor_files)\nstack.names\n\ndict_keys(['chnl_dist', 'dem', 'dist', 'ffreq', 'landimg2', 'landimg3', 'landimg4', 'mrvbf', 'rsp', 'slope', 'soil', 'twi'])\n\n\nPyspatialml implements pandas-style indexing for Raster objects, using Raster.loc to index by the name of the raster, and Raster.iloc to select by index. This method also accepts slices. Label-based indexing is also provided directly by the __getattr_ magic method, i.e. Raster[name] or for multiple layers Raster[(names)].\nFor example we can remove layers from Raster object using the Raster.drop method, or by subsetting the raster:\n\nstack.drop('ffreq')\n\nRaster Object Containing 11 Layers\n attribute values\n0 names [chnl_dist, dem, dist, landimg2, landimg3, lan...\n1 files [/Users/stevenpawley/Documents/GitHub/Pyspatia...\n2 rows 104\n3 cols 78\n4 res (40.0, 40.0)\n5 nodatavals [-99999.0, -99999.0, -1.0, -1.0, -1.0, -1.0, -...\n\n\n\n\n\nWe can store matplotlib cmaps as an attribute within each layer in the Raster:\n\nstack.chnl_dist.cmap = 'RdBu'\nstack.dem.cmap = 'terrain'\nstack.dist.cmap = 'Reds'\nstack.landimg2.cmap = 'Greys'\nstack.landimg3.cmap = 'Greys'\nstack.landimg4.cmap = 'Greys'\nstack.landimg4.cmap = 'Greys'\nstack.mrvbf.cmap = 'jet'\nstack.rsp.cmap = 'gnuplot2'\nstack.slope.cmap = 'PuRd'\nstack.soil.cmap = cm.get_cmap('Set2', 3)\nstack.twi.cmap = 'coolwarm'\n\nPlot the predictors in the Raster object as a raster matrix:\n\nmpl.style.use('seaborn-ticks')\naxs = stack.plot(figsize=(9, 7))\nax = axs.flatten()[10]\nim = ax.images\nim[0].colorbar.set_ticks([1,2,3])\nax = axs.flatten()[8]\nax.tick_params(axis='x', labelrotation=65)\n\nplt.tight_layout()\nplt.show()\n\n/var/folders/hy/tgvjqg6502s0jfgtmt4pbng00000gn/T/ipykernel_47504/2543982748.py:1: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.\n mpl.style.use('seaborn-ticks')"
- },
- {
- "objectID": "examples/Example 1 - Multitarget Regression of Soil Properties.html#multi-target-spatial-prediction-using-the-meuse-dataset",
- "href": "examples/Example 1 - Multitarget Regression of Soil Properties.html#multi-target-spatial-prediction-using-the-meuse-dataset",
- "title": "Spatial Modelling with Pyspatialml",
- "section": "",
- "text": "Here we are using the meuse dataset which is included in the pyspatialml package as an example of performing a spatial model and prediction. We can access the datasets using the pyspatialml.datasets module:\n\nfrom copy import deepcopy\nfrom tempfile import NamedTemporaryFile\nimport geopandas as gpd\nimport numpy as np\nfrom pyspatialml import Raster\nfrom pyspatialml.preprocessing import xy_coordinates, distance_to_corners\nimport pyspatialml.datasets.meuse as ms\n\nimport matplotlib as mpl\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\n\n\npredictor_files = ms.predictors\ntraining_pts_file = ms.meuse\n\n\nstack = Raster(predictor_files)\nstack.names\n\ndict_keys(['chnl_dist', 'dem', 'dist', 'ffreq', 'landimg2', 'landimg3', 'landimg4', 'mrvbf', 'rsp', 'slope', 'soil', 'twi'])\n\n\nPyspatialml implements pandas-style indexing for Raster objects, using Raster.loc to index by the name of the raster, and Raster.iloc to select by index. This method also accepts slices. Label-based indexing is also provided directly by the __getattr_ magic method, i.e. Raster[name] or for multiple layers Raster[(names)].\nFor example we can remove layers from Raster object using the Raster.drop method, or by subsetting the raster:\n\nstack.drop('ffreq')\n\nRaster Object Containing 11 Layers\n attribute values\n0 names [chnl_dist, dem, dist, landimg2, landimg3, lan...\n1 files [/Users/stevenpawley/Documents/GitHub/Pyspatia...\n2 rows 104\n3 cols 78\n4 res (40.0, 40.0)\n5 nodatavals [-99999.0, -99999.0, -1.0, -1.0, -1.0, -1.0, -...\n\n\n\n\n\nWe can store matplotlib cmaps as an attribute within each layer in the Raster:\n\nstack.chnl_dist.cmap = 'RdBu'\nstack.dem.cmap = 'terrain'\nstack.dist.cmap = 'Reds'\nstack.landimg2.cmap = 'Greys'\nstack.landimg3.cmap = 'Greys'\nstack.landimg4.cmap = 'Greys'\nstack.landimg4.cmap = 'Greys'\nstack.mrvbf.cmap = 'jet'\nstack.rsp.cmap = 'gnuplot2'\nstack.slope.cmap = 'PuRd'\nstack.soil.cmap = cm.get_cmap('Set2', 3)\nstack.twi.cmap = 'coolwarm'\n\nPlot the predictors in the Raster object as a raster matrix:\n\nmpl.style.use('seaborn-ticks')\naxs = stack.plot(figsize=(9, 7))\nax = axs.flatten()[10]\nim = ax.images\nim[0].colorbar.set_ticks([1,2,3])\nax = axs.flatten()[8]\nax.tick_params(axis='x', labelrotation=65)\n\nplt.tight_layout()\nplt.show()\n\n/var/folders/hy/tgvjqg6502s0jfgtmt4pbng00000gn/T/ipykernel_47504/2543982748.py:1: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.\n mpl.style.use('seaborn-ticks')"
- },
- {
- "objectID": "examples/Example 1 - Multitarget Regression of Soil Properties.html#feature-engineering",
- "href": "examples/Example 1 - Multitarget Regression of Soil Properties.html#feature-engineering",
- "title": "Spatial Modelling with Pyspatialml",
- "section": "Feature Engineering",
- "text": "Feature Engineering\nWe want the prediction results to be dependent on the spatial locations of the training data. So to include spatial information, coordinate grids can be generated and added to the Raster object:\n\nxy_layer = xy_coordinates(\n layer=stack.iloc[0], \n file_path=NamedTemporaryFile(suffix=\".tif\").name\n)\n\n\nxy_layer = xy_coordinates(\n layer=stack.iloc[0], \n file_path=NamedTemporaryFile(suffix=\".tif\").name\n)\n\nedms = distance_to_corners(\n layer=stack.iloc[0], \n file_path=NamedTemporaryFile(suffix=\".tif\").name\n)\nedms.rename(\n {old: new for (old, new) in zip(edms.names, [\"tl\", \"tr\", \"bl\", \"br\", \"c\"])},\n in_place=True\n)\n\nedms.plot()\nplt.show()\n\n\n\n\n\n\n\n\nAppend them to the Raster object:\n\nstack = stack.append([xy_layer, edms])\n\nPlot the new predictors:\n\nmpl.style.use('seaborn-ticks')\naxs = stack.plot(figsize=(9, 7))\nax = axs.flatten()[10]\nim = ax.images\nim[0].colorbar.set_ticks([1,2,3])\nax = axs.flatten()[8]\nax.tick_params(axis='x', labelrotation=65)\n\nplt.tight_layout()\nplt.show()\n\n/var/folders/hy/tgvjqg6502s0jfgtmt4pbng00000gn/T/ipykernel_47504/2543982748.py:1: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.\n mpl.style.use('seaborn-ticks')\n\n\n\n\n\n\n\n\n\nThe area that is filled by some of the grids is different. This doesn’t matter for the prediction because pixels in the Raster object that include some NaNs in some of the layers will be removed. However, the plots could potentially be given a cleaner look. We can use the Raster.intersect method to fix this:\n\nstack = stack.intersect()\n\n\nmpl.style.use('seaborn-ticks')\naxs = stack.plot(figsize=(9, 7))\nax = axs.flatten()[10]\nim = ax.images\nim[0].colorbar.set_ticks([1,2,3])\nax = axs.flatten()[8]\nax.tick_params(axis='x', labelrotation=65)\n\nplt.tight_layout()\nplt.show()\n\n/var/folders/hy/tgvjqg6502s0jfgtmt4pbng00000gn/T/ipykernel_47504/2543982748.py:1: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.\n mpl.style.use('seaborn-ticks')"
- },
- {
- "objectID": "examples/Example 1 - Multitarget Regression of Soil Properties.html#read-the-meuse-dataset",
- "href": "examples/Example 1 - Multitarget Regression of Soil Properties.html#read-the-meuse-dataset",
- "title": "Spatial Modelling with Pyspatialml",
- "section": "Read the Meuse Dataset",
- "text": "Read the Meuse Dataset\n\ntraining_pts = gpd.read_file(training_pts_file)\ntraining_pts.head()\n\n\n\n\n\n\n\n\n\ncadmium\ncopper\nlead\nzinc\nelev\ndist\nom\nffreq\nsoil\nlime\nlanduse\ndist.m\ngeometry\n\n\n\n\n0\n11.7\n85.0\n299.0\n1022.0\n7.909\n0.001358\n13.6\n1\n1\n1\nAh\n50.0\nPOINT (181072.000 333611.000)\n\n\n1\n8.6\n81.0\n277.0\n1141.0\n6.983\n0.012224\n14.0\n1\n1\n1\nAh\n30.0\nPOINT (181025.000 333558.000)\n\n\n2\n6.5\n68.0\n199.0\n640.0\n7.800\n0.103029\n13.0\n1\n1\n1\nAh\n150.0\nPOINT (181165.000 333537.000)\n\n\n3\n2.6\n81.0\n116.0\n257.0\n7.655\n0.190094\n8.0\n1\n2\n0\nGa\n270.0\nPOINT (181298.000 333484.000)\n\n\n4\n2.8\n48.0\n117.0\n269.0\n7.480\n0.277090\n8.7\n1\n2\n0\nAh\n380.0\nPOINT (181307.000 333330.000)\n\n\n\n\n\n\n\n\nPlot the training points:\n\nfrom mpl_toolkits.axes_grid1 import make_axes_locatable\nmpl.style.use('ggplot')\n\nfig, axs = plt.subplots(2, 3, figsize=(8.5, 7))\n\nfor i, (ax, target) in enumerate(zip(axs.ravel(), ['cadmium', 'copper', 'lead', 'zinc', 'om'])):\n ax.set_title(target.title())\n divider = make_axes_locatable(ax)\n cax = divider.append_axes(\"right\", size=\"10%\", pad=0.05)\n training_pts.plot(column=target, legend=True, ax=ax, cax=cax, cmap='viridis')\n \n if i != 0:\n ax.set_yticklabels([])\n \n if i != 3:\n ax.set_xticklabels([])\n else:\n ax.tick_params(axis='x', labelrotation=65)\n \nfig.delaxes(axs.flatten()[i+1])\nplt.tight_layout()\nplt.show()"
- },
- {
- "objectID": "examples/Example 1 - Multitarget Regression of Soil Properties.html#extract-raster-values-at-the-training-point-locations",
- "href": "examples/Example 1 - Multitarget Regression of Soil Properties.html#extract-raster-values-at-the-training-point-locations",
- "title": "Spatial Modelling with Pyspatialml",
- "section": "Extract Raster Values at the Training Point Locations",
- "text": "Extract Raster Values at the Training Point Locations\nPixel values from a Raster object can be extracted using geometries within a geopandas.GeoDataFrame (points, lines, polygons) or by using labelled pixels from another raster with the same dimensions and crs.\nBy default the extracted values are returned as a geopandas.GeoDataFrame that contains the data and the coordinates of the pixels:\n\ntraining_df = stack.extract_vector(gdf=training_pts)\n\ntraining_df.index = training_df.index.get_level_values(\"geometry_idx\")\ntraining_df = training_df.merge(\n training_pts.loc[:, (\"lead\", \"cadmium\", \"copper\", \"zinc\", \"om\")], \n left_index=True, \n right_index=True\n) \n\n\ntraining_df = training_df.dropna()\ntraining_df.head()\n\n\n\n\n\n\n\n\n\nchnl_dist\ndem\ndist\nffreq\nlandimg2\nlandimg3\nlandimg4\nmrvbf\nrsp\nslope\n...\ntr\nbl\nbr\nc\ngeometry\nlead\ncadmium\ncopper\nzinc\nom\n\n\ngeometry_idx\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n0\n0.000000\n3214.0\n0.001358\n1.0\n97.0\n92.0\n192.0\n3.523824e-06\n0.000000\n1.423307\n...\n12.369317\n119.268608\n100.717430\n55.470715\nPOINT (181072.000 333611.000)\n299.0\n11.7\n85.0\n1022.0\n13.6\n\n\n1\n79.849854\n3402.0\n0.012224\n1.0\n160.0\n183.0\n183.0\n9.879866e-06\n0.082085\n1.286004\n...\n13.928389\n117.046997\n98.858482\n53.235325\nPOINT (181025.000 333558.000)\n277.0\n8.6\n81.0\n1141.0\n14.0\n\n\n2\n0.000000\n3277.0\n0.103029\n1.0\n178.0\n209.0\n179.0\n1.340742e-03\n0.000000\n0.674711\n...\n10.295630\n119.281181\n98.412399\n55.226807\nPOINT (181165.000 333537.000)\n199.0\n6.5\n68.0\n640.0\n13.0\n\n\n3\n184.743164\n3563.0\n0.190094\n1.0\n114.0\n135.0\n152.0\n6.547428e-07\n0.192325\n1.413479\n...\n8.485281\n120.208153\n97.185387\n56.035702\nPOINT (181298.000 333484.000)\n116.0\n2.6\n81.0\n257.0\n8.0\n\n\n4\n16.768555\n3406.0\n0.277090\n1.0\n133.0\n154.0\n151.0\n1.588824e-03\n0.016689\n0.531276\n...\n11.661903\n117.004272\n93.193344\n52.801514\nPOINT (181307.000 333330.000)\n117.0\n2.8\n48.0\n269.0\n8.7\n\n\n\n\n5 rows × 25 columns"
- },
- {
- "objectID": "examples/Example 1 - Multitarget Regression of Soil Properties.html#developing-a-machine-learning-model",
- "href": "examples/Example 1 - Multitarget Regression of Soil Properties.html#developing-a-machine-learning-model",
- "title": "Spatial Modelling with Pyspatialml",
- "section": "Developing a Machine Learning Model",
- "text": "Developing a Machine Learning Model\nHere we are going to create a machine learning pipeline that correctly handles categorical predictors via one-hot encoding:\n\nstack.names\n\ndict_keys(['chnl_dist', 'dem', 'dist', 'ffreq', 'landimg2', 'landimg3', 'landimg4', 'mrvbf', 'rsp', 'slope', 'soil', 'twi', 'x_coordinates', 'y_coordinates', 'tl', 'tr', 'bl', 'br', 'c'])\n\n\n\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.ensemble import ExtraTreesRegressor\nfrom sklearn.preprocessing import OneHotEncoder\nfrom sklearn.compose import ColumnTransformer\n\nsoil_idx = [i for i, name in enumerate(stack.names) if name == 'soil']\n\ntrans = ColumnTransformer([\n ('ohe', OneHotEncoder(categories='auto', handle_unknown='ignore'), soil_idx)\n ], remainder='passthrough')\n\net = ExtraTreesRegressor(n_estimators=500, n_jobs=-1, random_state=1234)\net = Pipeline([\n ('preproc', trans),\n ('regressor', et)])\n\nNow we can separate our response and predictor variables and train the model:\n\nX = training_df.loc[:, stack.names]\ny = training_df.loc[:, ['lead', 'cadmium', 'copper', 'zinc', 'om']]\net.fit(X, y)\n\nPipeline(steps=[('preproc',\n ColumnTransformer(remainder='passthrough',\n transformers=[('ohe',\n OneHotEncoder(handle_unknown='ignore'),\n [10])])),\n ('regressor',\n ExtraTreesRegressor(n_estimators=500, n_jobs=-1,\n random_state=1234))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.PipelinePipeline(steps=[('preproc',\n ColumnTransformer(remainder='passthrough',\n transformers=[('ohe',\n OneHotEncoder(handle_unknown='ignore'),\n [10])])),\n ('regressor',\n ExtraTreesRegressor(n_estimators=500, n_jobs=-1,\n random_state=1234))])preproc: ColumnTransformerColumnTransformer(remainder='passthrough',\n transformers=[('ohe', OneHotEncoder(handle_unknown='ignore'),\n [10])])ohe[10]OneHotEncoderOneHotEncoder(handle_unknown='ignore')remainder['chnl_dist', 'dem', 'dist', 'ffreq', 'landimg2', 'landimg3', 'landimg4', 'mrvbf', 'rsp', 'slope', 'twi', 'x_coordinates', 'y_coordinates', 'tl', 'tr', 'bl', 'br', 'c']passthroughpassthroughExtraTreesRegressorExtraTreesRegressor(n_estimators=500, n_jobs=-1, random_state=1234)\n\n\nTo evaluate the performance of the model, we will use 10-fold cross validation:\n\nfrom sklearn.model_selection import cross_validate, KFold\n\nouter = KFold(n_splits=10, shuffle=True, random_state=1234)\nscores = cross_validate(et, X, y, scoring='neg_mean_squared_error', cv=10, n_jobs=1)\nrmse = np.sqrt(-scores['test_score']).mean()\n\nprint(\"Our RMSE score is {}\".format(rmse))\n\nOur RMSE score is 105.19227221271413"
- },
- {
- "objectID": "examples/Example 1 - Multitarget Regression of Soil Properties.html#feature-importances",
- "href": "examples/Example 1 - Multitarget Regression of Soil Properties.html#feature-importances",
- "title": "Spatial Modelling with Pyspatialml",
- "section": "Feature Importances",
- "text": "Feature Importances\n\nohe_names = deepcopy(list(stack.names))\nohe_names.insert(soil_idx[0], 'soil1')\nohe_names.insert(soil_idx[0], 'soil2')\nohe_names = np.array(ohe_names)\n\n\nmpl.style.use('ggplot')\n\nfimp = et.named_steps['regressor'].feature_importances_\n\nfig, ax = plt.subplots(figsize=(4, 6))\nax.barh(y=ohe_names[fimp.argsort()], width=fimp[fimp.argsort()])\nax.set_xlabel('Feature Importance Score')\nplt.show()"
- },
- {
- "objectID": "examples/Example 1 - Multitarget Regression of Soil Properties.html#prediction-on-the-raster-object",
- "href": "examples/Example 1 - Multitarget Regression of Soil Properties.html#prediction-on-the-raster-object",
- "title": "Spatial Modelling with Pyspatialml",
- "section": "Prediction on the Raster object",
- "text": "Prediction on the Raster object\n\npreds = stack.predict(et)\npreds.rename(\n {old: new for old, new in zip(preds.names, ['lead', 'cadmium', 'copper', 'zinc', 'om'])},\n in_place=True\n)\npreds.lead.cmap = 'rainbow'\npreds.cadmium.cmap = 'rainbow'\npreds.copper.cmap = 'rainbow'\npreds.zinc.cmap = 'rainbow'\npreds.om.cmap = 'rainbow'\n\n/Users/stevenpawley/mambaforge/envs/spatial/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but OneHotEncoder was fitted with feature names\n warnings.warn(\n/Users/stevenpawley/mambaforge/envs/spatial/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but OneHotEncoder was fitted with feature names\n warnings.warn(\n\n\nPlot the results:\n\nmpl.style.use('seaborn-ticks')\npreds.plot(out_shape=(200, 200), title_fontsize=14, figsize=(10, 8))\nplt.show()\n\n/var/folders/hy/tgvjqg6502s0jfgtmt4pbng00000gn/T/ipykernel_47504/3208474620.py:1: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead.\n mpl.style.use('seaborn-ticks')"
- },
- {
- "objectID": "docs/usage.html",
- "href": "docs/usage.html",
- "title": "Usage",
- "section": "",
- "text": "The main approach to working with raster datasets in Pyspatialml is through the Raster class. The Raster object takes a list of GDAL-supported raster datasets and references them as part of a single Raster object, which can be used to perform operations on the raster datasets as a whole. The Raster object is a thin wrapper around the rasterio library, which is a Python library for reading and writing raster datasets. The individual bands within the datasets are represented internally as RasterLayer objects. This allows for retaining metadata about each raster dataset and adding or removing raster datasets from the stack without making physical changes to the disk.\nNote that in order to initiate a Raster object, the underlying raster datasets must be spatially aligned in terms of their extent, resolution, and coordinate reference system - Raster objects do not perform any resampling or reprojection of the underlying datasets. Functions within the preprocessing module can be used to align raster datasets before creating a Raster object.\n\n\nThe most common approach of initiating a Raster object is from an existing raster dataset, or a list of raster datasets. Alternatively, a Raster object can also be initiated from a 3D numpy array:\n\nRaster(src=[raster1.tif, raster2.tif, raster3.tif]) creates a Raster object from existing file-based GDAL-supported datasets, or a single raster dataset. The file-based datasets can contain single or multiple bands.\nRaster(src=new_numpy_array, crs=crs, transform=transform) creates a Raster object from a 3D numpy array (band, row, column). The crs and transform arguments are optional but are required to provide coordinate reference system information to the Raster object. The crs argument has to be represented by rasterio crs.CRS object, and the transform parameter requires an affine.Affine object.\n\nRasters can also be initated directly from a rasterio.Band object(s), or from a list of RasterLayer objects (see below).\n\n\n\nGenerally, Pyspatialml intends users to work with the Raster object. However, internally, the Raster object is composed of RasterLayer objects, which represent individual bands of a raster dataset. RasterLayers are based on a rasterio.band object with some additional attributes and methods. However, unlike the rasterio.Band.ds.read method which reads all bands within a multi-band dataset, the RasterLayer read method always refers to a single band.\nMethods contained within RasterLayer objects are specifically designed to be applied to individual bands of a raster. These methods include operations such as sieve-clump, distance to non-NaN pixels, and arithmetic operations on individual layers.",
- "crumbs": [
- "Usage"
- ]
- },
- {
- "objectID": "docs/usage.html#the-raster-class",
- "href": "docs/usage.html#the-raster-class",
- "title": "Usage",
+ "objectID": "index.html",
+ "href": "index.html",
+ "title": "Overview",
"section": "",
- "text": "The main approach to working with raster datasets in Pyspatialml is through the Raster class. The Raster object takes a list of GDAL-supported raster datasets and references them as part of a single Raster object, which can be used to perform operations on the raster datasets as a whole. The Raster object is a thin wrapper around the rasterio library, which is a Python library for reading and writing raster datasets. The individual bands within the datasets are represented internally as RasterLayer objects. This allows for retaining metadata about each raster dataset and adding or removing raster datasets from the stack without making physical changes to the disk.\nNote that in order to initiate a Raster object, the underlying raster datasets must be spatially aligned in terms of their extent, resolution, and coordinate reference system - Raster objects do not perform any resampling or reprojection of the underlying datasets. Functions within the preprocessing module can be used to align raster datasets before creating a Raster object.\n\n\nThe most common approach of initiating a Raster object is from an existing raster dataset, or a list of raster datasets. Alternatively, a Raster object can also be initiated from a 3D numpy array:\n\nRaster(src=[raster1.tif, raster2.tif, raster3.tif]) creates a Raster object from existing file-based GDAL-supported datasets, or a single raster dataset. The file-based datasets can contain single or multiple bands.\nRaster(src=new_numpy_array, crs=crs, transform=transform) creates a Raster object from a 3D numpy array (band, row, column). The crs and transform arguments are optional but are required to provide coordinate reference system information to the Raster object. The crs argument has to be represented by rasterio crs.CRS object, and the transform parameter requires an affine.Affine object.\n\nRasters can also be initated directly from a rasterio.Band object(s), or from a list of RasterLayer objects (see below).\n\n\n\nGenerally, Pyspatialml intends users to work with the Raster object. However, internally, the Raster object is composed of RasterLayer objects, which represent individual bands of a raster dataset. RasterLayers are based on a rasterio.band object with some additional attributes and methods. However, unlike the rasterio.Band.ds.read method which reads all bands within a multi-band dataset, the RasterLayer read method always refers to a single band.\nMethods contained within RasterLayer objects are specifically designed to be applied to individual bands of a raster. These methods include operations such as sieve-clump, distance to non-NaN pixels, and arithmetic operations on individual layers.",
- "crumbs": [
- "Usage"
- ]
+ "text": "Pyspatialml is a Python package for applying scikit-learn machine learning models to raster-based datasets. It is inspired by the famous raster package in the R statistical programming language which has been extensively used for applying statistical and machine learning models to geospatial raster datasets.\nPyspatialml includes functions and classes for working with multiple raster datasets and applying typical machine learning workflows including raster data manipulation, feature engineering on raster datasets, extraction of training data, and application of the predict or predict_proba methods of scikit-learn estimator objects to a stack of raster datasets.\nPyspatialml is built upon the rasterio Python package which performs all of the heavy lifting and is designed to work with the geopandas package for related raster-vector data geoprocessing operations."
},
{
- "objectID": "docs/usage.html#principles-of-working-with-rasters",
- "href": "docs/usage.html#principles-of-working-with-rasters",
- "title": "Usage",
- "section": "Principles of working with Rasters",
- "text": "Principles of working with Rasters\nMethods that are applied to Raster objects are generally designed to be applied to the entire stack of raster datasets. For example, the crop method will crop all raster datasets in the stack to a common extent, and the mask method will apply a mask to all raster datasets in the stack. These methods always return a new Raster object, and do not modify the original Raster object by default. Subsetting of individual bands uses the same principles as the pandas library, where the loc method is used to subset bands based on their names, and the iloc method is used to subset bands based on their index. Also similarly to pandas, subsetting a single band will return the object itself, in this case, a RasterLayer object, while subsetting multiple bands will return a new Raster object.\nMethods that apply to individual RasterLayers are mostly related to extracting or summarizing metadata from the individual bands. For other methods that users may want to apply to individual bands, it is recommended to work with rasterio directly.",
- "crumbs": [
- "Usage"
- ]
+ "objectID": "index.html#purpose",
+ "href": "index.html#purpose",
+ "title": "Overview",
+ "section": "Purpose",
+ "text": "Purpose\nA supervised machine-learning workflow as applied to spatial raster data typically involves several steps:\n\nUsing vector features or labelled pixels to extract training data from a stack of raster-based predictors (e.g. spectral bands, terrain derivatives, or climate grids). The training data represent locations when some property/state/concentration is already established, and might comprise point locations of arsenic concentrations, or labelled pixels with integer-encoded values that correspond to known landcover types.\nDeveloping a machine learning classification or regression model on the training data. Pyspatialml is designed to use scikit-learn compatible api’s for this purpose.\nApplying the fitted machine learning model to make predictions on all of the pixels in the stack of raster data.\n\nPyspatialml is designed to make it easy to develop spatial prediction models on stacks of 2D raster datasets that are held on disk. Unlike using python’s numpy module directly where raster datasets need to be held in memory, the majority of functions within pyspatialml work with raster datasets that are stored on disk and allow processing operations to be performed on datasets that are too large to be loaded into memory.\nPyspatialml is designed to make it easy to work with typical raster data stacks consisting of multiple 2D grids such as different spectal bands, maps etc. However, it’s purpose is not to work with multidimensional datasets, i.e. those that have more than 3 dimensions such as spacetime cubes of multiband data. The xarray package can provide a structure for this type of data."
},
{
- "objectID": "docs/transformers.html",
- "href": "docs/transformers.html",
- "title": "Transformers",
+ "objectID": "docs/quickstart.html",
+ "href": "docs/quickstart.html",
+ "title": "Quick start",
"section": "",
- "text": "The transformers module contains classes that are used for spatial feature engineering.",
+ "text": "We are going to use a set of Landsat 7 bands contained within the nc example data:\n\nfrom pyspatialml import Raster\nimport pyspatialml.datasets.nc as nc\nimport matplotlib.pyplot as plt\n\npredictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7]\n\nThese raster datasets are aligned in terms of their extent and coordinate reference systems. We can ‘stack’ these into a Raster class so that we can perform machine learning related operations on the set of rasters:\n\nstack = Raster(predictors)\n\nWhen a Raster object is created, the names to each layer are automatically created based on syntactically-correct versions of the file basenames:\n\nstack.names\n\ndict_keys(['lsat7_2000_10', 'lsat7_2000_20', 'lsat7_2000_30', 'lsat7_2000_40', 'lsat7_2000_50', 'lsat7_2000_70'])\n\n\nColor ramps and matplotlib.colors.Normalize objects can be assigned to each RasterLayer in the object using the cmap and norm attributes for convenient in plotting:\n\nstack.lsat7_2000_10.cmap = \"Blues\"\nstack.lsat7_2000_20.cmap = \"Greens\"\nstack.lsat7_2000_30.cmap = \"Reds\"\nstack.lsat7_2000_40.cmap = \"RdPu\"\nstack.lsat7_2000_50.cmap = \"autumn\"\nstack.lsat7_2000_70.cmap = \"hot\"\n\nstack.plot(\n title_fontsize=8,\n label_fontsize=6,\n legend_fontsize=6,\n names=[\"B1\", \"B2\", \"B3\", \"B4\", \"B5\", \"B7\"],\n fig_kwds={\"figsize\": (8, 4)},\n subplots_kwds={\"wspace\": 0.3}\n)\nplt.show()",
"crumbs": [
"Guide",
"Geoprocessing",
- "Transformers"
+ "Quick start"
]
},
{
- "objectID": "docs/transformers.html#spatial-lag-transformer",
- "href": "docs/transformers.html#spatial-lag-transformer",
- "title": "Transformers",
- "section": "Spatial Lag Transformer",
- "text": "Spatial Lag Transformer\nA transformer to create spatial lag variables by using a weighted mean/mode of the values of the K-neighboring observations. The weighted mean/mode of the surrounding observations are appended as a new feature to the right-most column in the training data. The measure parameter should be set to ‘mode’ for classification, and ‘mean’ for regression.\nKNNTransformer(\n n_neighbors=7,\n weights=\"distance\",\n measure=\"mean\",\n radius=1.0,\n algorithm=\"auto\",\n leaf_size=30,\n metric=\"minkowski\",\n p=2,\n normalize=True,\n metric_params=None,\n kernel_params=None,\n n_jobs=1\n)",
+ "objectID": "docs/quickstart.html#initiating-a-raster-object",
+ "href": "docs/quickstart.html#initiating-a-raster-object",
+ "title": "Quick start",
+ "section": "",
+ "text": "We are going to use a set of Landsat 7 bands contained within the nc example data:\n\nfrom pyspatialml import Raster\nimport pyspatialml.datasets.nc as nc\nimport matplotlib.pyplot as plt\n\npredictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7]\n\nThese raster datasets are aligned in terms of their extent and coordinate reference systems. We can ‘stack’ these into a Raster class so that we can perform machine learning related operations on the set of rasters:\n\nstack = Raster(predictors)\n\nWhen a Raster object is created, the names to each layer are automatically created based on syntactically-correct versions of the file basenames:\n\nstack.names\n\ndict_keys(['lsat7_2000_10', 'lsat7_2000_20', 'lsat7_2000_30', 'lsat7_2000_40', 'lsat7_2000_50', 'lsat7_2000_70'])\n\n\nColor ramps and matplotlib.colors.Normalize objects can be assigned to each RasterLayer in the object using the cmap and norm attributes for convenient in plotting:\n\nstack.lsat7_2000_10.cmap = \"Blues\"\nstack.lsat7_2000_20.cmap = \"Greens\"\nstack.lsat7_2000_30.cmap = \"Reds\"\nstack.lsat7_2000_40.cmap = \"RdPu\"\nstack.lsat7_2000_50.cmap = \"autumn\"\nstack.lsat7_2000_70.cmap = \"hot\"\n\nstack.plot(\n title_fontsize=8,\n label_fontsize=6,\n legend_fontsize=6,\n names=[\"B1\", \"B2\", \"B3\", \"B4\", \"B5\", \"B7\"],\n fig_kwds={\"figsize\": (8, 4)},\n subplots_kwds={\"wspace\": 0.3}\n)\nplt.show()",
"crumbs": [
"Guide",
"Geoprocessing",
- "Transformers"
+ "Quick start"
]
},
{
- "objectID": "docs/transformers.html#geodisttransformer",
- "href": "docs/transformers.html#geodisttransformer",
- "title": "Transformers",
- "section": "GeoDistTransformer",
- "text": "GeoDistTransformer\nA common spatial feature engineering task is to create new features that describe the proximity to some reference locations. The GeoDistTransformer can be used to add these features as part of a machine learning pipeline.\nGeoDistTransformer(refs, log=False)\nWhere refs are an array of coordinates of reference locations in (m, n-dimensional) order, such as {n_locations, x_coordinates, y_coordinates, …} for as many dimensions as required. For example to calculate distances to a single x,y,z location:\nrefs = [-57.345, -110.134, 1012]\nAnd to calculate distances to three x,y reference locations:\nrefs = [\n [-57.345, -110.134],\n [-56.345, -109.123],\n [-58.534, -112.123]\n]\nThe supplied array has to have at least x,y coordinates with a (1, 2) shape for a single location.",
+ "objectID": "docs/quickstart.html#subsetting-and-indexing",
+ "href": "docs/quickstart.html#subsetting-and-indexing",
+ "title": "Quick start",
+ "section": "Subsetting and Indexing",
+ "text": "Subsetting and Indexing\nIndexing of Raster objects is provided by several methods:\nThe Raster[keys] method enables key-based indexing using a name of a RasterLayer, or a list of names. Direct subsetting of a Raster object instance returns a RasterLayer if only a single label is used, otherwise it always returns a new Raster object containing only the selected layers.\nThe Raster.iloc[int, list, tuple, slice] method allows a Raster object instance to be subset using integer-based indexing or slicing. The iloc method returns a RasterLayer object if only a single index is used, otherwise it always returns a new Raster object containing only the selected layers.\nSubsetting of a Raster object instance can also occur by using attribute names in the form of Raster.name_of_layer. Because only a single RasterLayer can be subset at one time using this approach, a RasterLayer object is always returned.\nExamples of methods to subset a Raster object:\n\n# subset based on position\nsingle_layer = stack.iloc[0]\n\n# subset using a slice\nnew_raster_obj = stack.iloc[0:3]\n\n# subset using labels\nsingle_layer = stack['lsat7_2000_10']\nsingle_layer = stack.lsat7_2000_10\n\n# list or tuple of keys\nnew_raster_obj = stack[('lsat7_2000_10', 'lsat7_2000_20')]\n\nIterate through RasterLayers individually:\n\nfor name, layer in stack.items():\n print(name, layer)\n\nlsat7_2000_10 <pyspatialml.rasterlayer.RasterLayer object at 0x17fb6b650>\nlsat7_2000_20 <pyspatialml.rasterlayer.RasterLayer object at 0x11e5d21d0>\nlsat7_2000_30 <pyspatialml.rasterlayer.RasterLayer object at 0x16d168bd0>\nlsat7_2000_40 <pyspatialml.rasterlayer.RasterLayer object at 0x17f80cc90>\nlsat7_2000_50 <pyspatialml.rasterlayer.RasterLayer object at 0x17fb6a110>\nlsat7_2000_70 <pyspatialml.rasterlayer.RasterLayer object at 0x17fd92510>\n\n\nReplace a RasterLayer with another:\n\nstack.iloc[0] = Raster(nc.band7).iloc[0]\n\nstack.iloc[0].plot()\nplt.show()",
"crumbs": [
"Guide",
"Geoprocessing",
- "Transformers"
+ "Quick start"
]
},
{
- "objectID": "docs/sampling.html",
- "href": "docs/sampling.html",
- "title": "Random Sampling",
- "section": "",
- "text": "For many spatial models, it is common to take a random sample of the predictors to represent a single class (i.e. an environmental background or pseudo-absences in a binary classification model). The sample function is supplied in the sampling module for this purpose:\n\nfrom pyspatialml import Raster\nimport pyspatialml.datasets.nc as nc\nimport matplotlib.pyplot as plt\n\npredictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7]\nstack = Raster(predictors)\n\n# extract training data using a random sample\ndf_rand = stack.sample(size=1000, random_state=1)\ndf_rand.plot()",
+ "objectID": "docs/quickstart.html#appending-and-dropping-layers",
+ "href": "docs/quickstart.html#appending-and-dropping-layers",
+ "title": "Quick start",
+ "section": "Appending and Dropping Layers",
+ "text": "Appending and Dropping Layers\nAppend layers from another Raster to the stack. Duplicate names are automatically given a suffix.\n\nstack.append(Raster(nc.band7), in_place=True)\nstack.names\n\ndict_keys(['lsat7_2000_10', 'lsat7_2000_20', 'lsat7_2000_30', 'lsat7_2000_40', 'lsat7_2000_50', 'lsat7_2000_70_1', 'lsat7_2000_70_2'])\n\n\nRename RasterLayers using a dict of old_name : new_name pairs:\n\nstack.names\nstack.rename({'lsat7_2000_30': 'new_name'}, in_place=True)\nstack.names\nstack.new_name\nstack['new_name']\n\n<pyspatialml.rasterlayer.RasterLayer at 0x16d168bd0>\n\n\nDrop a RasterLayer:\n\nstack.names\nstack.drop(labels='lsat7_2000_70_1', in_place=True)\nstack.names\n\ndict_keys(['lsat7_2000_10', 'lsat7_2000_20', 'new_name', 'lsat7_2000_40', 'lsat7_2000_50', 'lsat7_2000_70_2'])",
"crumbs": [
"Guide",
"Geoprocessing",
- "Random Sampling"
+ "Quick start"
]
},
{
- "objectID": "docs/sampling.html#random-uniform-sampling",
- "href": "docs/sampling.html#random-uniform-sampling",
- "title": "Random Sampling",
- "section": "",
- "text": "For many spatial models, it is common to take a random sample of the predictors to represent a single class (i.e. an environmental background or pseudo-absences in a binary classification model). The sample function is supplied in the sampling module for this purpose:\n\nfrom pyspatialml import Raster\nimport pyspatialml.datasets.nc as nc\nimport matplotlib.pyplot as plt\n\npredictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7]\nstack = Raster(predictors)\n\n# extract training data using a random sample\ndf_rand = stack.sample(size=1000, random_state=1)\ndf_rand.plot()",
+ "objectID": "docs/quickstart.html#integration-with-pandas",
+ "href": "docs/quickstart.html#integration-with-pandas",
+ "title": "Quick start",
+ "section": "Integration with Pandas",
+ "text": "Integration with Pandas\nData from a Raster object can converted into a Pandas.DataDrame, with each pixel representing by a row, and columns reflecting the x, y coordinates and the values of each RasterLayer in the Raster object:\n\nimport pandas as pd\n\ndf = stack.to_pandas(max_pixels=50000, resampling='nearest')\ndf.head()\n\n\n\n\n\n\n\n\n\nx\ny\nlsat7_2000_10\nlsat7_2000_20\nnew_name\nlsat7_2000_40\nlsat7_2000_50\nlsat7_2000_70_2\n\n\n\n\n0\n630534.000000\n228114.0\nNaN\nNaN\nNaN\nNaN\nNaN\nNaN\n\n\n1\n630562.558402\n228114.0\nNaN\nNaN\nNaN\nNaN\nNaN\nNaN\n\n\n2\n630591.116803\n228114.0\nNaN\nNaN\nNaN\nNaN\nNaN\nNaN\n\n\n3\n630619.675205\n228114.0\nNaN\nNaN\nNaN\nNaN\nNaN\nNaN\n\n\n4\n630648.233607\n228114.0\nNaN\nNaN\nNaN\nNaN\nNaN\nNaN\n\n\n\n\n\n\n\n\nThe original raster is up-sampled based on max_pixels and the resampling method, which uses all of resampling methods available in the underlying rasterio library for decimated reads.",
"crumbs": [
"Guide",
"Geoprocessing",
- "Random Sampling"
+ "Quick start"
]
},
{
- "objectID": "docs/sampling.html#stratified-random-sampling",
- "href": "docs/sampling.html#stratified-random-sampling",
- "title": "Random Sampling",
- "section": "Stratified Random Sampling",
- "text": "Stratified Random Sampling\nThe sample function also enables stratified random sampling based on passing a categorical raster dataset to the strata argument. The categorical raster should spatially overlap with the dataset to be sampled, but it does not need to be of the same grid resolution. This raster should be passed as a opened rasterio dataset:\n\nstrata = Raster(nc.strata)\ndf_strata = stack.sample(size=5, strata=strata, random_state=1)\ndf_strata = df_strata.dropna()\ndf_strata\n\n\n\n\n\n\n\n\n\nlsat7_2000_10\nlsat7_2000_20\nlsat7_2000_30\nlsat7_2000_40\nlsat7_2000_50\nlsat7_2000_70\ngeometry\n\n\n\n\n0\n96.0\n78.0\n88.0\n49.0\n71.0\n63.0\nPOINT (641093.250 225135.750)\n\n\n1\n113.0\n103.0\n122.0\n66.0\n136.0\n110.0\nPOINT (640979.250 222342.750)\n\n\n3\n82.0\n66.0\n67.0\n64.0\n76.0\n52.0\nPOINT (640095.750 225848.250)\n\n\n4\n99.0\n88.0\n95.0\n56.0\n98.0\n78.0\nPOINT (637559.250 226788.750)\n\n\n5\n81.0\n69.0\n76.0\n73.0\n118.0\n72.0\nPOINT (635621.250 218324.250)\n\n\n10\n91.0\n78.0\n81.0\n77.0\n97.0\n73.0\nPOINT (634709.250 221943.750)\n\n\n11\n72.0\n61.0\n51.0\n104.0\n91.0\n47.0\nPOINT (639269.250 220005.750)\n\n\n12\n86.0\n75.0\n78.0\n73.0\n87.0\n60.0\nPOINT (639326.250 224964.750)\n\n\n13\n71.0\n53.0\n48.0\n59.0\n78.0\n46.0\nPOINT (635222.250 218951.250)\n\n\n15\n76.0\n59.0\n63.0\n65.0\n114.0\n64.0\nPOINT (633027.750 218580.750)\n\n\n17\n75.0\n61.0\n55.0\n70.0\n74.0\n43.0\nPOINT (633369.750 219435.750)\n\n\n18\n78.0\n66.0\n69.0\n69.0\n110.0\n72.0\nPOINT (633198.750 225506.250)\n\n\n19\n68.0\n52.0\n40.0\n79.0\n58.0\n30.0\nPOINT (637986.750 222998.250)\n\n\n20\n70.0\n55.0\n52.0\n62.0\n79.0\n47.0\nPOINT (635649.750 217440.750)\n\n\n22\n71.0\n53.0\n48.0\n64.0\n77.0\n42.0\nPOINT (635564.250 222713.250)\n\n\n23\n72.0\n53.0\n51.0\n58.0\n82.0\n51.0\nPOINT (633056.250 218324.250)\n\n\n26\n81.0\n78.0\n79.0\n34.0\n41.0\n28.0\nPOINT (639297.750 223625.250)\n\n\n27\n73.0\n57.0\n51.0\n16.0\n14.0\n10.0\nPOINT (635364.750 224736.750)\n\n\n28\n73.0\n57.0\n52.0\n55.0\n57.0\n40.0\nPOINT (635535.750 223311.750)\n\n\n30\n138.0\n120.0\n132.0\n65.0\n129.0\n126.0\nPOINT (634196.250 226190.250)\n\n\n31\n72.0\n60.0\n47.0\n69.0\n82.0\n46.0\nPOINT (639810.750 219749.250)\n\n\n32\n132.0\n122.0\n140.0\n73.0\n171.0\n176.0\nPOINT (640352.250 218238.750)\n\n\n33\n170.0\n157.0\n176.0\n80.0\n182.0\n183.0\nPOINT (639924.750 219692.250)\n\n\n34\n115.0\n98.0\n106.0\n60.0\n110.0\n102.0\nPOINT (639953.250 219578.250)",
+ "objectID": "docs/quickstart.html#saving-a-raster-to-file",
+ "href": "docs/quickstart.html#saving-a-raster-to-file",
+ "title": "Quick start",
+ "section": "Saving a Raster to File",
+ "text": "Saving a Raster to File\nSave a Raster:\n\nimport tempfile\n\ntmp_tif = tempfile.NamedTemporaryFile().name + '.tif'\nnewstack = stack.write(file_path=tmp_tif, nodata=-9999)\nnewstack.new_name.read()\nnewstack = None",
"crumbs": [
"Guide",
"Geoprocessing",
- "Random Sampling"
+ "Quick start"
]
},
{
- "objectID": "docs/plotting.html",
- "href": "docs/plotting.html",
- "title": "Plotting",
+ "objectID": "docs/landcover.html",
+ "href": "docs/landcover.html",
+ "title": "Landcover classification",
"section": "",
- "text": "Both Raster and RasterLayer objects include basic plotting methods. The plot method for a RasterLayer object produces a single raster plot using the matplotlib.pyplot.imshow method.\nFor convenience, plot settings such as color ramps and stretches can also be set for each RasterLayer using the RasterLayer.cmap that support matplotlib cmap’s, and the RasterLayer.norm attribute to associate a matplotlib.colors.Normalize stretch with each RasterLayer:\nTo plot a single RasterLayer:\n\nfrom pyspatialml import Raster\nfrom pyspatialml.datasets import nc\nimport matplotlib.pyplot as plt\n\nstack = Raster([nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7])\n\n# set RasterLayer color table\nstack.lsat7_2000_10.cmap = \"plasma\"\n\n# plot a single layer using an existing axis\nfig, ax = plt.subplots()\nstack.lsat7_2000_10.plot(ax=ax)\nplt.show()\n\n\n\n\n\n\n\n\nFor RasterLayers that represent categorical data types, e.g. land cover, then the RasterLayer.categorical=True attribute will cause the cmap to be converted to a discrete scale.\nThe default plot method for a Raster object produces a raster-matrix plot of the individual RasterLayers. By default this plot preserves the plotting attributes of the individual rasters:\nPlot all RasterLayers in a Raster object:\n\nstack.lsat7_2000_10.cmap = \"Blues\"\nstack.lsat7_2000_20.cmap = \"Greens\"\nstack.lsat7_2000_30.cmap = \"Reds\"\nstack.lsat7_2000_40.cmap = \"RdPu\"\nstack.lsat7_2000_50.cmap = \"autumn\"\nstack.lsat7_2000_70.cmap = \"hot\"\n\nstack.plot(\n title_fontsize=8,\n label_fontsize=6,\n legend_fontsize=6,\n names=[\"B1\", \"B2\", \"B3\", \"B4\", \"B5\", \"B7\"],\n fig_kwds={\"figsize\": (8, 4)},\n subplots_kwds={\"wspace\": 0.3}\n)\nplt.show()\n\n\n\n\n\n\n\n\nThe Raster.plot method also provides cmap and norm arguments that can be used to override the settings of the individual RasterLayers. Additional settings can be passed to control plot layout using the figure_kwds, legend_kwds and subplots_kwds arguments.",
+ "text": "Landcover classification is a common task in remote sensing. This example demonstrates how to extract training data from a raster and vector data, train a classifier, and predict landcover classes on a raster.",
"crumbs": [
- "Guide",
- "Geoprocessing",
- "Plotting"
+ "Tutorials",
+ "Landcover",
+ "Landcover classification"
]
},
{
- "objectID": "docs/installation.html",
- "href": "docs/installation.html",
- "title": "Installation",
- "section": "",
- "text": "Pyspatialml is available on PyPI and can be installed in the usual manner with:\n\npip install Pyspatialml\n\nThe development version, which is more up-to-date with changes to the package especially during these earlier stages of development, can be installed directly via:\n\npip install git+https://github.com/stevenpawley/Pyspatialml",
+ "objectID": "docs/landcover.html#data",
+ "href": "docs/landcover.html#data",
+ "title": "Landcover classification",
+ "section": "Data",
+ "text": "Data\nThe data used in this example is from the Landsat 7 ETM+ sensor, and represents an extract of data derived from the GRASS GIS North Carolina example dataset. The data consists of 6 bands (1, 2, 3, 4, 5, 7) and labelled pixels. The labelled pixels are used as training data for the classifier. The data is stored in the pyspatialml.datasets module.",
"crumbs": [
- "Installation"
+ "Tutorials",
+ "Landcover",
+ "Landcover classification"
]
},
{
- "objectID": "reference/vector.html",
- "href": "reference/vector.html",
- "title": "vector",
- "section": "",
- "text": "vector\n\n\n\n\n\nName\nDescription\n\n\n\n\nfilter_points\nFilter points in geodataframe using a minimum distance buffer.\n\n\nget_random_point_in_polygon\nGenerates random shapely Point geometry objects within a single\n\n\n\n\n\nvector.filter_points(gdf, min_dist=0, remove='first')\nFilter points in geodataframe using a minimum distance buffer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ngdf\nGeopandas GeoDataFrame\nContaining point geometries.\nrequired\n\n\nmin_dist\n(int or float, optional(default=0))\nMinimum distance by which to filter out closely spaced points.\n0\n\n\nremove\n(str, optional(default=first))\nOptionally choose to remove ‘first’ occurrences or ‘last’ occurrences.\n'first'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\n2d array-like\nNumpy array filtered coordinates\n\n\n\n\n\n\n\nvector.get_random_point_in_polygon(poly)\nGenerates random shapely Point geometry objects within a single shapely Polygon object.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\npoly\nShapely Polygon object\n\nrequired\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nShapely Point object",
+ "objectID": "docs/landcover.html#extraction-training-data",
+ "href": "docs/landcover.html#extraction-training-data",
+ "title": "Landcover classification",
+ "section": "Extraction Training Data",
+ "text": "Extraction Training Data\nLoad some training data in the form of polygons, points and labelled pixels in geopandas.GeoDataFrame objects. We will also generate some line geometries by converting the polygon boundaries into linestrings. All of these geometry types can be used to spatially query pixel values in a Raster object, however each GeoDataFrame must contain only one type of geometry (i.e. either shapely points, polygons or linestrings).\n\nfrom pyspatialml import Raster\nfrom pyspatialml.datasets import nc\nfrom copy import deepcopy\nimport os\nimport numpy as np\nimport tempfile\nimport geopandas\nimport rasterio.plot\nimport matplotlib.pyplot as plt\n\ntraining_py = geopandas.read_file(nc.polygons)\ntraining_pt = geopandas.read_file(nc.points)\ntraining_px = rasterio.open(nc.labelled_pixels)\ntraining_lines = deepcopy(training_py)\ntraining_lines['geometry'] = training_lines.geometry.boundary\n\nShow training data points and a single raster band using numpy and matplotlib:\n\npredictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7]\nstack = Raster(predictors)\n\nfig, ax = plt.subplots(figsize=(9, 9))\nstack.lsat7_2000_70.plot(ax=ax)\n\ntraining_py.plot(column=\"label\", ax=ax, legend=True)\nplt.show()\n\n\n\n\n\n\n\n\nPixel values in the Raster object can be spatially queried using the extract_vector and extract_raster methods. In addition, the extract_xy method can be used to query pixel values using a 2d array of x and y coordinates.\nThe extract_vector method accepts a Geopandas GeoDataFrame as the gdf argument. For GeoDataFrames containing shapely point geometries, the closest pixel to each point is sampled. For shapely polygon geometries, all pixels whose centres are inside the polygon are sampled. For shapely linestring geometries, every pixel touched by the line is sampled. For all geometry types, pixel values are queries for each geometry separately. This means that overlapping polygons or points that fall within the same pixel with cause the same pixel to be sampled multiple times.\nBy default, the extract functions return a Geopandas GeoDataFrame of point geometries and the DataFrame containing the extracted pixels, with the column names set by the names of the raster datasets in the Raster object. The user can also use the return_array=True argument, which instead of returning a DataFrame will return three masked numpy arrays (ids, X, xy) containing the geodataframe index positions, extracted pixel values, and the spatial coordinates of the sampled pixels. These arrays are masked arrays.\nThe extract_raster method can also be used to spatially query pixel values from a Raster object using another raster containing labelled pixels. This raster has to be spatially aligned with the Raster object. The values of the labelled pixels are returned along with the queried pixel values.\n\n# Extract data from rasters at the training point locations:\ndf_points = stack.extract_vector(training_pt)\ndf_polygons = stack.extract_vector(training_py)\ndf_lines = stack.extract_vector(training_lines)\n\nFor any vector features, a GeoDataFrame is returned containing the extracted pixel values. A pandas.MultiIndex is used to relate the pixels back to the original geometries, with the pixel_idx index referring to the index of each pixel, and the geometry_idx referring to the index of the original geometry in the supplied GeoDataFrame. The pixel values themselves are represented as shapely.geometry.Point objects. These will need to be joined back with the columns of the vector features to get the labelled classes. Here we will join the extracted pixels using the “id” column and the GeoDataFrame index of the vector features:\n\n# Join the extracted values with other columns from the training data\ndf_points[\"id\"] = training_pt[\"id\"].values\ndf_points = df_points.dropna()\ndf_points.head()\n\ndf_polygons = df_polygons.merge(\n right=training_py.loc[:, [\"label\", \"id\"]], \n left_on=\"geometry_idx\", \n right_on=\"index\",\n right_index=True\n)\n\nIf the training data is from labelled pixels in a raster, then the extracted data will contain a “value” column that contains the pixel labels:\n\ndf_raster = stack.extract_raster(training_px)",
"crumbs": [
- "Reference",
- "Vector tools",
- "vector"
+ "Tutorials",
+ "Landcover",
+ "Landcover classification"
]
},
{
- "objectID": "reference/vector.html#functions",
- "href": "reference/vector.html#functions",
- "title": "vector",
- "section": "",
- "text": "Name\nDescription\n\n\n\n\nfilter_points\nFilter points in geodataframe using a minimum distance buffer.\n\n\nget_random_point_in_polygon\nGenerates random shapely Point geometry objects within a single\n\n\n\n\n\nvector.filter_points(gdf, min_dist=0, remove='first')\nFilter points in geodataframe using a minimum distance buffer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ngdf\nGeopandas GeoDataFrame\nContaining point geometries.\nrequired\n\n\nmin_dist\n(int or float, optional(default=0))\nMinimum distance by which to filter out closely spaced points.\n0\n\n\nremove\n(str, optional(default=first))\nOptionally choose to remove ‘first’ occurrences or ‘last’ occurrences.\n'first'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\n2d array-like\nNumpy array filtered coordinates\n\n\n\n\n\n\n\nvector.get_random_point_in_polygon(poly)\nGenerates random shapely Point geometry objects within a single shapely Polygon object.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\npoly\nShapely Polygon object\n\nrequired\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nShapely Point object",
+ "objectID": "docs/landcover.html#model-training",
+ "href": "docs/landcover.html#model-training",
+ "title": "Landcover classification",
+ "section": "Model Training",
+ "text": "Model Training\nNext we can train a logistic regression classifier:\n\nfrom sklearn.linear_model import LogisticRegressionCV\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import cross_validate\n\n# define the classifier with standardization of the input features in a\n# pipeline\nlr = Pipeline(\n [('scaling', StandardScaler()),\n ('classifier', LogisticRegressionCV(n_jobs=-1))])\n\n# remove NaNs from training data\ndf_polygons = df_polygons.dropna()\n\n# fit the classifier\nX = df_polygons.drop(columns=[\"id\", \"label\", \"geometry\"]).values\ny = df_polygons[\"id\"].values\nlr.fit(X, y)\n\nPipeline(steps=[('scaling', StandardScaler()),\n ('classifier', LogisticRegressionCV(n_jobs=-1))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. Pipeline?Documentation for PipelineiFittedPipeline(steps=[('scaling', StandardScaler()),\n ('classifier', LogisticRegressionCV(n_jobs=-1))]) StandardScaler?Documentation for StandardScalerStandardScaler() LogisticRegressionCV?Documentation for LogisticRegressionCVLogisticRegressionCV(n_jobs=-1) \n\n\nAfter defining a classifier, a typical step consists of performing a cross-validation to evaluate the performance of the model. Scikit-learn provides the cross_validate function for this purpose. In comparison to non-spatial data, spatial data can be spatially correlated, which potentially can mean that geographically proximal samples may not represent truely independent samples if they are within the autocorrelation range of some of the predictors. This will lead to overly optimistic performance measures if samples in the training dataset / cross-validation partition are strongly spatially correlated with samples in the test dataset / cross-validation partition.\nIn this case, performing cross-validation using groups is useful, because these groups can represent spatial clusters of training samples, and samples from the same group will never occur in both the training and test partitions of a cross-validation. Here we can use the polygon indices as the groups, i.e. pixels within the same polygon will not be split into training and test partitions:\n\nscores = cross_validate(\n estimator=lr,\n X=X,\n y=y,\n groups=df_polygons.index.droplevel(\"pixel_idx\"),\n scoring=\"accuracy\",\n cv=3,\n n_jobs=1,\n)\nnp.round(scores['test_score'].mean(), 2)\n\n0.75",
"crumbs": [
- "Reference",
- "Vector tools",
- "vector"
+ "Tutorials",
+ "Landcover",
+ "Landcover classification"
]
},
{
- "objectID": "reference/RasterLayer.html",
- "href": "reference/RasterLayer.html",
- "title": "RasterLayer",
- "section": "",
- "text": "RasterLayer(self, band)\nRepresents a single raster band derived from a single or multi-band raster dataset\nSimple wrapper around a rasterio.Band object with additional methods. Used because the Rasterio.Band.ds.read method reads all bands from a multi-band dataset, whereas the RasterLayer read method only reads a single band.\nMethods encapsulated in RasterLayer objects represent those that typically would only be applied to a single-band of a raster, i.e. sieve-clump, distance to non-NaN pixels, or arithmetic operations on individual layers.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nbidx\nint\nThe band index of the RasterLayer within the file dataset.\n\n\ndtype\nstr\nThe data type of the RasterLayer.\n\n\nds\nrasterio.rasterio.band\nThe underlying rasterio.band object.\n\n\nname\nstr\nA syntactically valid name for the RasterLayer.\n\n\nfile\nstr\nThe file path to the dataset.\n\n\nnodata\nany number\nThe number that is used to represent nodata pixels in the RasterLayer.\n\n\ndriver\nstr\nThe name of the GDAL format driver.\n\n\nmeta\ndict\nA python dict storing the RasterLayer metadata.\n\n\ntransform\naffine.Affine object\nThe affine transform parameters.\n\n\ncount\nint\nNumber of layers; always equal to 1.\n\n\nshape\ntuple\nShape of RasterLayer in (rows, columns)\n\n\nwidth, height\nint\nThe width (cols) and height (rows) of the dataset.\n\n\nbounds\nBoundingBox named tuple\nA named tuple with left, bottom, right and top coordinates of the dataset.\n\n\ncmap\nstr\nThe name of matplotlib map, or a custom matplotlib.cm.LinearSegmentedColormap or ListedColormap object.\n\n\nnorm\nmatplotlib.matplotlib.colors.matplotlib.colors.Normalize(opt)\nA matplotlib.colors.Normalize to apply to the RasterLayer. This overides the norm attribute of the RasterLayer.\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nmax\nMaximum value.\n\n\nmean\nMean value\n\n\nmedian\nMedian value\n\n\nmin\nMinimum value.\n\n\nplot\nPlot a RasterLayer using matplotlib.pyplot.imshow\n\n\nread\nRead method for a single RasterLayer.\n\n\nstddev\nStandard deviation\n\n\nwrite\nWrite method for a single RasterLayer.\n\n\n\n\n\nRasterLayer.max(max_pixels=10000)\nMaximum value.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe maximum value of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.mean(max_pixels=10000)\nMean value\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe mean value of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.median(max_pixels=10000)\nMedian value\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe medium value of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.min(max_pixels=10000)\nMinimum value.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe minimum value of the object\n\n\n\n\n\n\n\nRasterLayer.plot(cmap=None, norm=None, ax=None, cax=None, figsize=None, out_shape=(500, 500), categorical=None, legend=False, vmin=None, vmax=None, fig_kwds=None, legend_kwds=None)\nPlot a RasterLayer using matplotlib.pyplot.imshow\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncmap\nstr (default None)\nThe name of a colormap recognized by matplotlib. Overrides the cmap attribute of the RasterLayer.\nNone\n\n\nnorm\nmatplotlib.matplotlib.colors.matplotlib.colors.Normalize(opt)\nA matplotlib.colors.Normalize to apply to the RasterLayer. This overrides the norm attribute of the RasterLayer.\nNone\n\n\nax\nmatplotlib.pyplot.Artist (optional\naxes instance on which to draw to plot.\nNone)\n\n\ncax\nmatplotlib.pyplot.Artist (optional\naxes on which to draw the legend.\nNone)\n\n\nfigsize\ntuple of integers (optional\nSize of the matplotlib.figure.Figure. If the ax argument is given explicitly, figsize is ignored.\nNone)\n\n\nout_shape\ntuple\nNumber of rows, cols to read from the raster datasets for plotting.\n(500, 500)\n\n\ncategorical\nbool (optional\nif True then the raster values will be considered to represent discrete values, otherwise they are considered to represent continuous values. This overrides the RasterLayer ‘categorical’ attribute. Setting the argument categorical to True is ignored if the RasterLayer.categorical is already True.\nFalse)\n\n\nlegend\nbool (optional\nWhether to plot the legend.\nFalse)\n\n\nvmin\nscale (optional\nvmin and vmax define the data range that the colormap covers. By default, the colormap covers the complete value range of the supplied data. vmin, vmax are ignored if the norm parameter is used.\nNone)\n\n\nxmax\nscale (optional\nvmin and vmax define the data range that the colormap covers. By default, the colormap covers the complete value range of the supplied data. vmin, vmax are ignored if the norm parameter is used.\nNone)\n\n\nfig_kwds\ndict (optional\nAdditional arguments to pass to the matplotlib.pyplot.figure call when creating the figure object. Ignored if ax is passed to the plot function.\nNone)\n\n\nlegend_kwds\ndict (optional\nKeyword arguments to pass to matplotlib.pyplot.colorbar().\nNone)\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nmatplotlib axes instance\n\n\n\n\n\n\n\n\nRasterLayer.read(**kwargs)\nRead method for a single RasterLayer.\nReads the pixel values from a RasterLayer into a ndarray that always will have two dimensions in the order of (rows, columns).\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\n**kwargs\nnamed arguments that can be passed to the the\nrasterio.DatasetReader.read method.\n{}\n\n\n\n\n\n\n\nRasterLayer.stddev(max_pixels=10000)\nStandard deviation\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe standard deviation of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.write(file_path, driver='GTiff', dtype=None, nodata=None, **kwargs)\nWrite method for a single RasterLayer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfile_path\nstr(opt)\nFile path to save the dataset.\nrequired\n\n\ndriver\nstr\nGDAL-compatible driver used for the file format.\n'GTiff'\n\n\ndtype\nstr(opt)\nNumpy dtype used for the file. If omitted then the RasterLayer’s dtype is used.\nNone\n\n\nnodata\nany number (opt)\nA value used to represent the nodata pixels. If omitted then the RasterLayer’s nodata value is used (if assigned already).\nNone\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.RasterLayer",
+ "objectID": "docs/landcover.html#raster-prediction",
+ "href": "docs/landcover.html#raster-prediction",
+ "title": "Landcover classification",
+ "section": "Raster Prediction",
+ "text": "Raster Prediction\nPrediction on the Raster object is performed using the predict method. The estimator is the only required argument. If the file_path argument is not specified then the result is automatically written to a temporary file. The predict method returns an rasterio.io.DatasetReader object which is open.\n\n# prediction\nresult = stack.predict(estimator=lr, dtype='int16', nodata=0)\nresult_probs = stack.predict_proba(estimator=lr)\n\n# plot classification result\nresult.iloc[0].cmap = \"Dark2\"\nresult.iloc[0].categorical = True\n\nresult.plot()\nplt.show()\n\n\n\n\n\n\n\n\nThe predict_proba method can be used to output class probabilities as a multi-band raster (a band for each class probability). In the latter case, indexes can also be supplied if you only want to output the probabilities for a particular class, or list of classes, by supplying the indices of those classes:\n\nresult_probs.plot()\nplt.show()\n\n/Users/stevenpawley/Library/Caches/pypoetry/virtualenvs/pyspatialml-NqZ1tMUm-py3.11/lib/python3.11/site-packages/matplotlib/image.py:499: RuntimeWarning: overflow encountered in divide\n A_scaled /= ((a_max - a_min) / frac)",
"crumbs": [
- "Reference",
- "Raster datasets",
- "RasterLayer"
+ "Tutorials",
+ "Landcover",
+ "Landcover classification"
]
},
{
- "objectID": "reference/RasterLayer.html#attributes",
- "href": "reference/RasterLayer.html#attributes",
- "title": "RasterLayer",
+ "objectID": "docs/multitarget-regression-soil-properties.html",
+ "href": "docs/multitarget-regression-soil-properties.html",
+ "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
"section": "",
- "text": "Name\nType\nDescription\n\n\n\n\nbidx\nint\nThe band index of the RasterLayer within the file dataset.\n\n\ndtype\nstr\nThe data type of the RasterLayer.\n\n\nds\nrasterio.rasterio.band\nThe underlying rasterio.band object.\n\n\nname\nstr\nA syntactically valid name for the RasterLayer.\n\n\nfile\nstr\nThe file path to the dataset.\n\n\nnodata\nany number\nThe number that is used to represent nodata pixels in the RasterLayer.\n\n\ndriver\nstr\nThe name of the GDAL format driver.\n\n\nmeta\ndict\nA python dict storing the RasterLayer metadata.\n\n\ntransform\naffine.Affine object\nThe affine transform parameters.\n\n\ncount\nint\nNumber of layers; always equal to 1.\n\n\nshape\ntuple\nShape of RasterLayer in (rows, columns)\n\n\nwidth, height\nint\nThe width (cols) and height (rows) of the dataset.\n\n\nbounds\nBoundingBox named tuple\nA named tuple with left, bottom, right and top coordinates of the dataset.\n\n\ncmap\nstr\nThe name of matplotlib map, or a custom matplotlib.cm.LinearSegmentedColormap or ListedColormap object.\n\n\nnorm\nmatplotlib.matplotlib.colors.matplotlib.colors.Normalize(opt)\nA matplotlib.colors.Normalize to apply to the RasterLayer. This overides the norm attribute of the RasterLayer.",
+ "text": "Here we are using the meuse dataset which is included in the pyspatialml package as an example of performing a spatial model and prediction. We can access the datasets using the pyspatialml.datasets module:\nfrom copy import deepcopy\nfrom tempfile import NamedTemporaryFile\nimport geopandas as gpd\nimport numpy as np\nfrom pyspatialml import Raster\nfrom pyspatialml.preprocessing import xy_coordinates, distance_to_corners\nimport pyspatialml.datasets.meuse as ms\n\nimport matplotlib as mpl\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\npredictor_files = ms.predictors\ntraining_pts_file = ms.meuse\nstack = Raster(predictor_files)\nstack.names\n\ndict_keys(['chnl_dist', 'dem', 'dist', 'ffreq', 'landimg2', 'landimg3', 'landimg4', 'mrvbf', 'rsp', 'slope', 'soil', 'twi'])\nPyspatialml implements pandas-style indexing for Raster objects, using Raster.loc to index by the name of the raster, and Raster.iloc to select by index. This method also accepts slices. Label-based indexing is also provided directly by the __getattr_ magic method, i.e. Raster[name] or for multiple layers Raster[(names)].\nFor example we can remove layers from Raster object using the Raster.drop method, or by subsetting the raster:\nstack.drop('ffreq')\n\nRaster Object Containing 11 Layers\n attribute values\n0 names [chnl_dist, dem, dist, landimg2, landimg3, lan...\n1 files [/Users/stevenpawley/GitHub/Pyspatialml/pyspat...\n2 rows 104\n3 cols 78\n4 res (40.0, 40.0)\n5 nodatavals [-99999.0, -99999.0, -1.0, -1.0, -1.0, -1.0, -...\nWe can store matplotlib cmaps as an attribute within each layer in the Raster:\nstack.chnl_dist.cmap = 'RdBu'\nstack.dem.cmap = 'terrain'\nstack.dist.cmap = 'Reds'\nstack.landimg2.cmap = 'Greys'\nstack.landimg3.cmap = 'Greys'\nstack.landimg4.cmap = 'Greys'\nstack.landimg4.cmap = 'Greys'\nstack.mrvbf.cmap = 'jet'\nstack.rsp.cmap = 'gnuplot2'\nstack.slope.cmap = 'PuRd'\nstack.soil.cmap = 'Set2'\nstack.twi.cmap = 'coolwarm'\nPlot the predictors in the Raster object as a raster matrix:\nmpl.style.use('seaborn-v0_8')\naxs = stack.plot(figsize=(9, 7))\nax = axs.flatten()[10]\nim = ax.images\nim[0].colorbar.set_ticks([1,2,3])\nax = axs.flatten()[8]\nax.tick_params(axis='x', labelrotation=65)\n\nplt.tight_layout()\nplt.show()",
"crumbs": [
- "Reference",
- "Raster datasets",
- "RasterLayer"
+ "Tutorials",
+ "Multitarget regression",
+ "Multi-Target Spatial Prediction using the Meuse Dataset"
]
},
{
- "objectID": "reference/RasterLayer.html#methods",
- "href": "reference/RasterLayer.html#methods",
- "title": "RasterLayer",
- "section": "",
- "text": "Name\nDescription\n\n\n\n\nmax\nMaximum value.\n\n\nmean\nMean value\n\n\nmedian\nMedian value\n\n\nmin\nMinimum value.\n\n\nplot\nPlot a RasterLayer using matplotlib.pyplot.imshow\n\n\nread\nRead method for a single RasterLayer.\n\n\nstddev\nStandard deviation\n\n\nwrite\nWrite method for a single RasterLayer.\n\n\n\n\n\nRasterLayer.max(max_pixels=10000)\nMaximum value.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe maximum value of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.mean(max_pixels=10000)\nMean value\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe mean value of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.median(max_pixels=10000)\nMedian value\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe medium value of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.min(max_pixels=10000)\nMinimum value.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe minimum value of the object\n\n\n\n\n\n\n\nRasterLayer.plot(cmap=None, norm=None, ax=None, cax=None, figsize=None, out_shape=(500, 500), categorical=None, legend=False, vmin=None, vmax=None, fig_kwds=None, legend_kwds=None)\nPlot a RasterLayer using matplotlib.pyplot.imshow\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncmap\nstr (default None)\nThe name of a colormap recognized by matplotlib. Overrides the cmap attribute of the RasterLayer.\nNone\n\n\nnorm\nmatplotlib.matplotlib.colors.matplotlib.colors.Normalize(opt)\nA matplotlib.colors.Normalize to apply to the RasterLayer. This overrides the norm attribute of the RasterLayer.\nNone\n\n\nax\nmatplotlib.pyplot.Artist (optional\naxes instance on which to draw to plot.\nNone)\n\n\ncax\nmatplotlib.pyplot.Artist (optional\naxes on which to draw the legend.\nNone)\n\n\nfigsize\ntuple of integers (optional\nSize of the matplotlib.figure.Figure. If the ax argument is given explicitly, figsize is ignored.\nNone)\n\n\nout_shape\ntuple\nNumber of rows, cols to read from the raster datasets for plotting.\n(500, 500)\n\n\ncategorical\nbool (optional\nif True then the raster values will be considered to represent discrete values, otherwise they are considered to represent continuous values. This overrides the RasterLayer ‘categorical’ attribute. Setting the argument categorical to True is ignored if the RasterLayer.categorical is already True.\nFalse)\n\n\nlegend\nbool (optional\nWhether to plot the legend.\nFalse)\n\n\nvmin\nscale (optional\nvmin and vmax define the data range that the colormap covers. By default, the colormap covers the complete value range of the supplied data. vmin, vmax are ignored if the norm parameter is used.\nNone)\n\n\nxmax\nscale (optional\nvmin and vmax define the data range that the colormap covers. By default, the colormap covers the complete value range of the supplied data. vmin, vmax are ignored if the norm parameter is used.\nNone)\n\n\nfig_kwds\ndict (optional\nAdditional arguments to pass to the matplotlib.pyplot.figure call when creating the figure object. Ignored if ax is passed to the plot function.\nNone)\n\n\nlegend_kwds\ndict (optional\nKeyword arguments to pass to matplotlib.pyplot.colorbar().\nNone)\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nmatplotlib axes instance\n\n\n\n\n\n\n\n\nRasterLayer.read(**kwargs)\nRead method for a single RasterLayer.\nReads the pixel values from a RasterLayer into a ndarray that always will have two dimensions in the order of (rows, columns).\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\n**kwargs\nnamed arguments that can be passed to the the\nrasterio.DatasetReader.read method.\n{}\n\n\n\n\n\n\n\nRasterLayer.stddev(max_pixels=10000)\nStandard deviation\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe standard deviation of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.write(file_path, driver='GTiff', dtype=None, nodata=None, **kwargs)\nWrite method for a single RasterLayer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfile_path\nstr(opt)\nFile path to save the dataset.\nrequired\n\n\ndriver\nstr\nGDAL-compatible driver used for the file format.\n'GTiff'\n\n\ndtype\nstr(opt)\nNumpy dtype used for the file. If omitted then the RasterLayer’s dtype is used.\nNone\n\n\nnodata\nany number (opt)\nA value used to represent the nodata pixels. If omitted then the RasterLayer’s nodata value is used (if assigned already).\nNone\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.RasterLayer",
+ "objectID": "docs/multitarget-regression-soil-properties.html#feature-engineering",
+ "href": "docs/multitarget-regression-soil-properties.html#feature-engineering",
+ "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
+ "section": "Feature Engineering",
+ "text": "Feature Engineering\nWe want the prediction results to be dependent on the spatial locations of the training data. So to include spatial information, coordinate grids can be generated and added to the Raster object:\n\nxy_layer = xy_coordinates(\n layer=stack.iloc[0], \n file_path=NamedTemporaryFile(suffix=\".tif\").name\n)\n\n\nxy_layer = xy_coordinates(\n layer=stack.iloc[0], \n file_path=NamedTemporaryFile(suffix=\".tif\").name\n)\n\nedms = distance_to_corners(\n layer=stack.iloc[0], \n file_path=NamedTemporaryFile(suffix=\".tif\").name\n)\nedms.rename(\n {old: new for (old, new) in zip(edms.names, [\"tl\", \"tr\", \"bl\", \"br\", \"c\"])},\n in_place=True\n)\n\nedms.plot()\nplt.show()\n\n\n\n\n\n\n\n\nAppend them to the Raster object:\n\nstack = stack.append([xy_layer, edms])\n\nPlot the new predictors:\n\naxs = stack.plot(figsize=(9, 7))\nax = axs.flatten()[10]\nim = ax.images\nim[0].colorbar.set_ticks([1,2,3])\nax = axs.flatten()[8]\nax.tick_params(axis='x', labelrotation=65)\n\nplt.tight_layout()\nplt.show()\n\n\n\n\n\n\n\n\nThe area that is filled by some of the grids is different. This doesn’t matter for the prediction because pixels in the Raster object that include some NaNs in some of the layers will be removed. However, the plots could potentially be given a cleaner look. We can use the Raster.intersect method to fix this:\n\nstack = stack.intersect()\n\n\naxs = stack.plot(figsize=(9, 7))\nax = axs.flatten()[10]\nim = ax.images\nim[0].colorbar.set_ticks([1,2,3])\nax = axs.flatten()[8]\nax.tick_params(axis='x', labelrotation=65)\n\nplt.tight_layout()\nplt.show()",
"crumbs": [
- "Reference",
- "Raster datasets",
- "RasterLayer"
+ "Tutorials",
+ "Multitarget regression",
+ "Multi-Target Spatial Prediction using the Meuse Dataset"
]
},
{
- "objectID": "reference/preprocessing.html",
- "href": "reference/preprocessing.html",
- "title": "preprocessing",
- "section": "",
- "text": "preprocessing\n\n\n\n\n\nName\nDescription\n\n\n\n\ndistance_to_corners\nGenerate buffer distances to corner and centre coordinates of raster\n\n\ndistance_to_samples\nGenerate buffer distances to x,y coordinates.\n\n\none_hot_encode\nOne-hot encoding of a RasterLayer.\n\n\nrotated_coordinates\nGenerate 2d arrays with n_angles rotated coordinates.\n\n\nxy_coordinates\nFill 2d arrays with their x,y indices.\n\n\n\n\n\npreprocessing.distance_to_corners(layer, file_path, driver='GTiff')\nGenerate buffer distances to corner and centre coordinates of raster extent.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\n\nrequired\n\n\nfile_path\nstr\nFile path to save to the resulting Raster object\nrequired\n\n\ndriver\n(str, optional.optional.Default is GTiff)\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.Raster object\n\n\n\n\n\n\n\n\npreprocessing.distance_to_samples(layer, file_path, rows, cols, driver='GTiff')\nGenerate buffer distances to x,y coordinates.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\nRasterLayer to use as a template.\nrequired\n\n\nfile_path\nstr\nFile path to save to the resulting Raster object.\nrequired\n\n\nrows\n1d numpy array\narray of row indexes.\nrequired\n\n\ncols\n1d numpy array\narray of column indexes.\nrequired\n\n\ndriver\nstr\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.Raster object\n\n\n\n\n\n\n\n\npreprocessing.one_hot_encode(layer, file_path, categories=None, driver='GTiff')\nOne-hot encoding of a RasterLayer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.pyspatialml.RasterLayer\nContaining categories to perform one-hot encoding on.\nrequired\n\n\nfile_path\nstr\nFile path to save one-hot encoded raster.\nrequired\n\n\ncategories\n(list, ndarray)\nOptional list of categories to extract. Default performs one-hot encoding on all categorical values in the input layer.\nNone\n\n\ndriver\n(str, options.options.Default is GTiff)\nGDAL-compatible driver.\n'GTiff'\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nEach categorical value is encoded as a layer with a Raster object.\n\n\n\n\n\n\n\npreprocessing.rotated_coordinates(layer, file_path, n_angles=8, driver='GTiff')\nGenerate 2d arrays with n_angles rotated coordinates.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\nRasterLayer to use as a template.\nrequired\n\n\nn_angles\n(int, optional.optional.Default is 8)\nNumber of angles to rotate coordinate system by.\n8\n\n\ndriver\n(str, optional.optional.Default is GTiff)\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\n\n\n\n\n\n\n\n\npreprocessing.xy_coordinates(layer, file_path, driver='GTiff')\nFill 2d arrays with their x,y indices.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\nRasterLayer to use as a template.\nrequired\n\n\nfile_path\nstr\nFile path to save to the resulting Raster object.s\nrequired\n\n\ndriver\n(str, options.options.Default is GTiff)\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.Raster object",
+ "objectID": "docs/multitarget-regression-soil-properties.html#read-the-meuse-dataset",
+ "href": "docs/multitarget-regression-soil-properties.html#read-the-meuse-dataset",
+ "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
+ "section": "Read the Meuse Dataset",
+ "text": "Read the Meuse Dataset\n\ntraining_pts = gpd.read_file(training_pts_file)\ntraining_pts.head()\n\n\n\n\n\n\n\n\n\ncadmium\ncopper\nlead\nzinc\nelev\ndist\nom\nffreq\nsoil\nlime\nlanduse\ndist.m\ngeometry\n\n\n\n\n0\n11.7\n85.0\n299.0\n1022.0\n7.909\n0.001358\n13.6\n1\n1\n1\nAh\n50.0\nPOINT (181072.000 333611.000)\n\n\n1\n8.6\n81.0\n277.0\n1141.0\n6.983\n0.012224\n14.0\n1\n1\n1\nAh\n30.0\nPOINT (181025.000 333558.000)\n\n\n2\n6.5\n68.0\n199.0\n640.0\n7.800\n0.103029\n13.0\n1\n1\n1\nAh\n150.0\nPOINT (181165.000 333537.000)\n\n\n3\n2.6\n81.0\n116.0\n257.0\n7.655\n0.190094\n8.0\n1\n2\n0\nGa\n270.0\nPOINT (181298.000 333484.000)\n\n\n4\n2.8\n48.0\n117.0\n269.0\n7.480\n0.277090\n8.7\n1\n2\n0\nAh\n380.0\nPOINT (181307.000 333330.000)\n\n\n\n\n\n\n\n\nPlot the training points:\n\nfrom mpl_toolkits.axes_grid1 import make_axes_locatable\n\nfig, axs = plt.subplots(2, 3, figsize=(8.5, 7))\n\nfor i, (ax, target) in enumerate(zip(axs.ravel(), ['cadmium', 'copper', 'lead', 'zinc', 'om'])):\n ax.set_title(target.title())\n divider = make_axes_locatable(ax)\n cax = divider.append_axes(\"right\", size=\"10%\", pad=0.05)\n training_pts.plot(column=target, legend=True, ax=ax, cax=cax, cmap='viridis')\n \n if i != 0:\n ax.set_yticklabels([])\n \n if i != 3:\n ax.set_xticklabels([])\n else:\n ax.tick_params(axis='x', labelrotation=65)\n \nfig.delaxes(axs.flatten()[i+1])\nplt.tight_layout()\nplt.show()",
"crumbs": [
- "Reference",
- "Preprocessing",
- "preprocessing"
+ "Tutorials",
+ "Multitarget regression",
+ "Multi-Target Spatial Prediction using the Meuse Dataset"
]
},
{
- "objectID": "reference/preprocessing.html#functions",
- "href": "reference/preprocessing.html#functions",
- "title": "preprocessing",
- "section": "",
- "text": "Name\nDescription\n\n\n\n\ndistance_to_corners\nGenerate buffer distances to corner and centre coordinates of raster\n\n\ndistance_to_samples\nGenerate buffer distances to x,y coordinates.\n\n\none_hot_encode\nOne-hot encoding of a RasterLayer.\n\n\nrotated_coordinates\nGenerate 2d arrays with n_angles rotated coordinates.\n\n\nxy_coordinates\nFill 2d arrays with their x,y indices.\n\n\n\n\n\npreprocessing.distance_to_corners(layer, file_path, driver='GTiff')\nGenerate buffer distances to corner and centre coordinates of raster extent.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\n\nrequired\n\n\nfile_path\nstr\nFile path to save to the resulting Raster object\nrequired\n\n\ndriver\n(str, optional.optional.Default is GTiff)\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.Raster object\n\n\n\n\n\n\n\n\npreprocessing.distance_to_samples(layer, file_path, rows, cols, driver='GTiff')\nGenerate buffer distances to x,y coordinates.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\nRasterLayer to use as a template.\nrequired\n\n\nfile_path\nstr\nFile path to save to the resulting Raster object.\nrequired\n\n\nrows\n1d numpy array\narray of row indexes.\nrequired\n\n\ncols\n1d numpy array\narray of column indexes.\nrequired\n\n\ndriver\nstr\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.Raster object\n\n\n\n\n\n\n\n\npreprocessing.one_hot_encode(layer, file_path, categories=None, driver='GTiff')\nOne-hot encoding of a RasterLayer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.pyspatialml.RasterLayer\nContaining categories to perform one-hot encoding on.\nrequired\n\n\nfile_path\nstr\nFile path to save one-hot encoded raster.\nrequired\n\n\ncategories\n(list, ndarray)\nOptional list of categories to extract. Default performs one-hot encoding on all categorical values in the input layer.\nNone\n\n\ndriver\n(str, options.options.Default is GTiff)\nGDAL-compatible driver.\n'GTiff'\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nEach categorical value is encoded as a layer with a Raster object.\n\n\n\n\n\n\n\npreprocessing.rotated_coordinates(layer, file_path, n_angles=8, driver='GTiff')\nGenerate 2d arrays with n_angles rotated coordinates.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\nRasterLayer to use as a template.\nrequired\n\n\nn_angles\n(int, optional.optional.Default is 8)\nNumber of angles to rotate coordinate system by.\n8\n\n\ndriver\n(str, optional.optional.Default is GTiff)\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\n\n\n\n\n\n\n\n\npreprocessing.xy_coordinates(layer, file_path, driver='GTiff')\nFill 2d arrays with their x,y indices.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\nRasterLayer to use as a template.\nrequired\n\n\nfile_path\nstr\nFile path to save to the resulting Raster object.s\nrequired\n\n\ndriver\n(str, options.options.Default is GTiff)\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.Raster object",
+ "objectID": "docs/multitarget-regression-soil-properties.html#extract-raster-values-at-the-training-point-locations",
+ "href": "docs/multitarget-regression-soil-properties.html#extract-raster-values-at-the-training-point-locations",
+ "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
+ "section": "Extract Raster Values at the Training Point Locations",
+ "text": "Extract Raster Values at the Training Point Locations\nPixel values from a Raster object can be extracted using geometries within a geopandas.GeoDataFrame (points, lines, polygons) or by using labelled pixels from another raster with the same dimensions and crs.\nBy default the extracted values are returned as a geopandas.GeoDataFrame that contains the data and the coordinates of the pixels:\n\ntraining_df = stack.extract_vector(gdf=training_pts)\n\ntraining_df.index = training_df.index.get_level_values(\"geometry_idx\")\ntraining_df = training_df.merge(\n training_pts.loc[:, (\"lead\", \"cadmium\", \"copper\", \"zinc\", \"om\")], \n left_index=True, \n right_index=True\n) \n\n\ntraining_df = training_df.dropna()\ntraining_df.head()\n\n\n\n\n\n\n\n\n\nchnl_dist\ndem\ndist\nffreq\nlandimg2\nlandimg3\nlandimg4\nmrvbf\nrsp\nslope\n...\ntr\nbl\nbr\nc\ngeometry\nlead\ncadmium\ncopper\nzinc\nom\n\n\ngeometry_idx\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n0\n0.000000\n3214.0\n0.001358\n1.0\n97.0\n92.0\n192.0\n3.523824e-06\n0.000000\n1.423307\n...\n12.369317\n119.268608\n100.717430\n55.470715\nPOINT (181072.000 333611.000)\n299.0\n11.7\n85.0\n1022.0\n13.6\n\n\n1\n79.849854\n3402.0\n0.012224\n1.0\n160.0\n183.0\n183.0\n9.879866e-06\n0.082085\n1.286004\n...\n13.928389\n117.046997\n98.858482\n53.235325\nPOINT (181025.000 333558.000)\n277.0\n8.6\n81.0\n1141.0\n14.0\n\n\n2\n0.000000\n3277.0\n0.103029\n1.0\n178.0\n209.0\n179.0\n1.340742e-03\n0.000000\n0.674711\n...\n10.295630\n119.281181\n98.412399\n55.226807\nPOINT (181165.000 333537.000)\n199.0\n6.5\n68.0\n640.0\n13.0\n\n\n3\n184.743164\n3563.0\n0.190094\n1.0\n114.0\n135.0\n152.0\n6.547428e-07\n0.192325\n1.413479\n...\n8.485281\n120.208153\n97.185387\n56.035702\nPOINT (181298.000 333484.000)\n116.0\n2.6\n81.0\n257.0\n8.0\n\n\n4\n16.768555\n3406.0\n0.277090\n1.0\n133.0\n154.0\n151.0\n1.588824e-03\n0.016689\n0.531276\n...\n11.661903\n117.004272\n93.193344\n52.801514\nPOINT (181307.000 333330.000)\n117.0\n2.8\n48.0\n269.0\n8.7\n\n\n\n\n5 rows × 25 columns",
"crumbs": [
- "Reference",
- "Preprocessing",
- "preprocessing"
+ "Tutorials",
+ "Multitarget regression",
+ "Multi-Target Spatial Prediction using the Meuse Dataset"
]
},
{
- "objectID": "reference/index.html",
- "href": "reference/index.html",
- "title": "Function reference",
- "section": "",
- "text": "Raster is a class for reading and writing raster datasets\n\n\n\nRaster\nCreates a collection of file-based GDAL-supported raster\n\n\nRasterLayer\nRepresents a single raster band derived from a single or\n\n\n\n\n\n\nTools for working with vector datasets\n\n\n\nvector\n\n\n\n\n\n\n\nPreprocessing tools for raster datasets\n\n\n\npreprocessing"
- },
- {
- "objectID": "reference/index.html#raster-datasets",
- "href": "reference/index.html#raster-datasets",
- "title": "Function reference",
- "section": "",
- "text": "Raster is a class for reading and writing raster datasets\n\n\n\nRaster\nCreates a collection of file-based GDAL-supported raster\n\n\nRasterLayer\nRepresents a single raster band derived from a single or"
- },
- {
- "objectID": "reference/index.html#vector-tools",
- "href": "reference/index.html#vector-tools",
- "title": "Function reference",
- "section": "",
- "text": "Tools for working with vector datasets\n\n\n\nvector"
- },
- {
- "objectID": "reference/index.html#preprocessing",
- "href": "reference/index.html#preprocessing",
- "title": "Function reference",
- "section": "",
- "text": "Preprocessing tools for raster datasets\n\n\n\npreprocessing"
- },
- {
- "objectID": "reference/Raster.html",
- "href": "reference/Raster.html",
- "title": "Raster",
- "section": "",
- "text": "Raster(self, src, crs=None, transform=None, nodata=None, file_path=None, driver=None, tempdir=tempfile.tempdir, in_memory=False)\nCreates a collection of file-based GDAL-supported raster datasets that share a common coordinate reference system and geometry.\nRaster objects encapsulate RasterLayer objects, which represent single band raster datasets that can physically be represented by either separate single-band raster files, multi-band raster files, or any combination of individual bands from multi-band raster and single-band raster datasets.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nfiles\nlist\nA list of the raster dataset files that are used in the Raster. This does not have to be the same length as the number of RasterLayers because some files may have multiple bands.\n\n\nmeta\ndict\nA dict containing the raster metadata. The dict contains the following keys/values: crs : the crs object transform : the Affine.affine transform object width : width of the Raster in pixels height : height of the Raster in pixels count : number of RasterLayers within the Raster dtype : the numpy datatype that represents lowest common denominator of the different dtypes for all of the layers in the Raster.\n\n\nnames\nlist\nA list of the RasterLayer names.\n\n\nblock_shape\ntuple\nThe default block_shape in (rows, cols) for reading windows of data in the Raster for out-of-memory processing.\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\naggregate\nAggregates a raster to (usually) a coarser grid cell size.\n\n\nalter\nApply a fitted scikit-learn transformer to a Raster object.\n\n\nappend\nMethod to add new RasterLayers to a Raster object.\n\n\napply\nApply user-supplied function to a Raster object.\n\n\nblock_shapes\nGenerator for windows for optimal reading and writing based\n\n\nclose\nClose all of the RasterLayer objects in the Raster.\n\n\ncopy\nCreates a shallow copy of a Raster object\n\n\ncrop\nCrops a Raster object by the supplied bounds.\n\n\ndrop\nDrop individual RasterLayers from a Raster object\n\n\nextract_raster\nSample a Raster object by an aligned raster of labelled pixels.\n\n\nextract_vector\nSample a Raster/RasterLayer using a geopandas GeoDataframe\n\n\nextract_xy\nSamples pixel values using an array of xy locations.\n\n\nhead\nReturn the first 10 rows from the Raster as a ndarray\n\n\nintersect\nPerform a intersect operation on the Raster object.\n\n\nmask\nMask a Raster object based on the outline of shapes in a\n\n\npredict\nApply prediction of a scikit learn model to a Raster.\n\n\npredict_proba\nApply class probability prediction of a scikit learn model to a Raster.\n\n\nread\nReads data from the Raster object into a numpy array.\n\n\nrename\nRename a RasterLayer within the Raster object.\n\n\nsample\nGenerates a random sample of according to size, and samples\n\n\nscale\nStandardize (centre and scale) a Raster object by\n\n\nset_block_shape\nSet the block shape of the raster, i.e. the height and width\n\n\ntail\nReturn the last 10 rows from the Raster as a ndarray\n\n\nto_crs\nReprojects a Raster object to a different crs.\n\n\nto_pandas\nRaster to pandas DataFrame.\n\n\nwrite\nWrite the Raster object to a file.\n\n\n\n\n\nRaster.aggregate(out_shape, resampling='nearest', file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nAggregates a raster to (usually) a coarser grid cell size.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nout_shape\ntuple\nNew shape in (rows, cols).\nrequired\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use when applying decimated reads when out_shape is specified. Supported methods are: ‘average’, ‘bilinear’, ‘cubic’, ‘cubic_spline’, ‘gauss’, ‘lanczos’, ‘max’, ‘med’, ‘min’, ‘mode’, ‘q1’, ‘q3’.\n'nearest'\n\n\nfile_path\nstr (optional\nFile path to save to cropped raster. If not supplied then the aggregated raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new intersected Raster is created using the dtype of the existing Raster dataset, which uses a dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s dtype. Note that this does not change the pixel nodata values of the raster, it only changes the metadata of what value represents a nodata pixel.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster object aggregated to a new pixel size.\n\n\n\n\n\n\n\nRaster.alter(transformer, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False)\nApply a fitted scikit-learn transformer to a Raster object.\nCan be used to transform a raster using methods such as StandardScaler, RobustScaler etc.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntransformer\na sklearn.preprocessing.Transformer object\n\nrequired\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, a data type is set based on the data type of the prediction.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for operation.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nPyspatialml.Raster object with transformed data.\n\n\n\n\n\n\n\n\nRaster.append(other, in_place=False)\nMethod to add new RasterLayers to a Raster object.\nNote that this modifies the Raster object in-place by default.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nother\nRaster object, or list of Raster objects\nObject to append to the Raster.\nrequired\n\n\nin_place\nbool (default False)\nWhether to change the Raster object in-place or leave original and return a new Raster object.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nReturned only if in_place is False\n\n\n\n\n\n\n\nRaster.apply(function, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False, function_args={}, **kwargs)\nApply user-supplied function to a Raster object.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfunction\nfunction\nFunction that takes an numpy array as a single argument.\nrequired\n\n\nfile_path\nstr (optional\nOptional path to save calculated Raster object. If not specified then a tempfile is used.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new Raster is created using the dtype of the calculation result.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this changes the values of the pixels that represent nodata pixels.\nNone)\n\n\nprogress\nbool (default False)\nOptionally show progress of transform operations.\nFalse\n\n\nfunction_args\ndict(optional)\nOptionally pass arguments to the function as a dict or keyword arguments.\n{}\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster containing the calculated result.\n\n\n\n\n\n\n\nRaster.block_shapes(rows, cols)\nGenerator for windows for optimal reading and writing based on the raster format Windows and returns as a tuple with xoff, yoff, width, height.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nrows\nint\nHeight of window in rows.\nrequired\n\n\ncols\nint\nWidth of window in columns.\nrequired\n\n\n\n\n\n\n\nRaster.close()\nClose all of the RasterLayer objects in the Raster.\nNote that this will cause any rasters based on temporary files to be removed. This is intended as a method of clearing temporary files that may have accumulated during an analysis session.\n\n\n\nRaster.copy(subset=None)\nCreates a shallow copy of a Raster object\nNote that shallow in the context of a Raster object means that an immutable copy of the object is made, however the on-disk and in-memory file locations remain the same.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsubset\nopt\nA list of layer names to subset while copying.\nNone\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\n\n\n\n\n\n\n\n\nRaster.crop(bounds, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nCrops a Raster object by the supplied bounds.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nbounds\ntuple\nA tuple containing the bounding box to clip by in the form of (xmin, ymin, xmax, ymax).\nrequired\n\n\nfile_path\nstr (optional\nFile path to save to cropped raster. If not supplied then the cropped raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’). Default is ‘GTiff’\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new intersected Raster is created using the dtype of theexisting Raster dataset, which uses a dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this does not change the pixel nodata values of the raster, it only changes the metadata of what value represents a nodata pixel.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster cropped to new extent.\n\n\n\n\n\n\n\nRaster.drop(labels, in_place=False)\nDrop individual RasterLayers from a Raster object\nNote that this modifies the Raster object in-place by default.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlabels\nsingle label or list-like\nIndex (int) or layer name to drop. Can be a single integer or label, or a list of integers or labels.\nrequired\n\n\nin_place\nbool (default False)\nWhether to change the Raster object in-place or leave original and return a new Raster object.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nReturned only if in_place is True\n\n\n\n\n\n\n\nRaster.extract_raster(src, progress=False)\nSample a Raster object by an aligned raster of labelled pixels.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsrc\n\nSingle band raster containing labelled pixels as an open rasterio DatasetReader object.\nrequired\n\n\nprogress\nbool(opt)\nShow a progress bar for extraction.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\ngeopandas.geopandas.GeoDataFrame\nGeodataframe containing extracted data as point features if return_array=False\n\n\n\n\n\n\n\nRaster.extract_vector(gdf, progress=False)\nSample a Raster/RasterLayer using a geopandas GeoDataframe containing points, lines or polygon features.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ngdf\n\nContaining either point, line or polygon geometries. Overlapping geometries will cause the same pixels to be sampled.\nrequired\n\n\nprogress\nbool(opt)\nShow a progress bar for extraction.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\ngeopandas.geopandas.GeoDataframe\nContaining extracted data as point geometries (one point per pixel). The resulting GeoDataFrame is indexed using a named pandas.MultiIndex, with pixel_idx index referring to the index of each pixel that was sampled, and the geometry_idx index referring to the index of the each geometry in the supplied gdf. This makes it possible to keep track of how sampled pixel relates to the original geometries, i.e. multiple pixels being extracted within the area of a single polygon that can be referred to using the geometry_idx. The extracted data can subsequently be joined with the attribute table of the supplied gdf using: training_py = geopandas.read_file(nc.polygons) df = self.stack.extract_vector(gdf=training_py) df = df.dropna() df = df.merge( right=training_py.loc[:, (“id”, “label”)], left_on=“polygon_idx”, right_on=“id”, right_index=True )\n\n\n\n\n\n\n\nRaster.extract_xy(xys, return_array=False, progress=False)\nSamples pixel values using an array of xy locations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nxys\n2d array-like\nx and y coordinates from which to sample the raster (n_samples, xys).\nrequired\n\n\nreturn_array\nbool(opt)\nBy default the extracted pixel values are returned as a geopandas.GeoDataFrame. If return_array=True then the extracted pixel values are returned as a tuple of numpy.ndarrays.\nFalse\n\n\nprogress\nbool(opt)\nShow a progress bar for extraction.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\ngeopandas.geopandas.GeoDataframe\nContaining extracted data as point geometries if return_array=False.\n\n\nnumpy.numpy.ndarray\n2d masked array containing sampled raster values (sample, bands) at the x,y locations.\n\n\n\n\n\n\n\nRaster.head()\nReturn the first 10 rows from the Raster as a ndarray\n\n\n\nRaster.intersect(file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nPerform a intersect operation on the Raster object.\nComputes the geometric intersection of the RasterLayers with the Raster object. This will cause nodata values in any of the rasters to be propagated through all of the output rasters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfile_path\nstr (optional\nFile path to save to resulting Raster. If not supplied then the resulting Raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new intersected Raster is created using the dtype of the existing Raster dataset, which uses a dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this changes the values of the pixels that represent nodata to the new value.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster with layers that are masked based on a union of all masks in the suite of RasterLayers.\n\n\n\n\n\n\n\nRaster.mask(shapes, invert=False, crop=True, pad=False, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nMask a Raster object based on the outline of shapes in a geopandas.GeoDataFrame\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nshapes\ngeopandas.geopandas.GeoDataFrame\nGeoDataFrame containing masking features.\nrequired\n\n\ninvert\nbool (default False)\nIf False then pixels outside shapes will be masked. If True then pixels inside shape will be masked.\nFalse\n\n\ncrop\nbool (default True)\nCrop the raster to the extent of the shapes.\nTrue\n\n\npad\nbool (default False)\nIf True, the features will be padded in each direction by one half of a pixel prior to cropping raster.\nFalse\n\n\nfile_path\nstr (optional\nFile path to save to resulting Raster. If not supplied then the resulting Raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the cropped Raster is created using the existing dtype, which usesa dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for cropped dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this changes the values of the pixels to the new nodata value, and changes the metadata of the raster.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nRaster with masked layers.\n\n\n\n\n\n\n\nRaster.predict(estimator, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False, constants=None, **kwargs)\nApply prediction of a scikit learn model to a Raster.\nThe model can represent any scikit learn model or compatible api with a fit and predict method. These can consist of classification or regression models. Multi-class classifications and multi-target regressions are also supported.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nestimator\nestimator object implementing ‘fit’\nThe object to use to fit the data.\nrequired\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, np.float32 is assumed.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for prediction.\nFalse\n\n\nconstants\n\nConstant features to add to the Raster object with each value in a list or 1d ndarray representing an additional feature. If a list-like object of values os passed, then each numeric value will be appended as constant features to the last columns in the data. It is therefore important that all features including constant features are present in the same order as what was used to train the model. If a dict is passed, then the keys of the dict must refer to the names of raster layers in the Raster object. In this case, the values of the dict will replace the values of the raster layers in the Raster object.\nNone\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster object containing prediction results as a RasterLayers. For classification and regression models, the Raster will contain a single RasterLayer, unless the model is multi-class or multi-target. Layers are named automatically as pred_raw_n with n = 1, 2, 3 ..n.\n\n\n\n\n\n\n\nRaster.predict_proba(estimator, file_path=None, in_memory=False, indexes=None, driver='GTiff', dtype=None, nodata=None, constants=None, progress=False, **kwargs)\nApply class probability prediction of a scikit learn model to a Raster.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nestimator\nestimator object implementing ‘fit’\nThe object to use to fit the data.\nrequired\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\nindexes\nlist of integers (optional\nList of class indices to export. In some circumstances, only a subset of the class probability estimations are desired, for instance when performing a binary classification only the probabilities for the positive class may be desired.\nNone)\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, a data type is set based on the data type of the prediction.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for prediction.\nFalse\n\n\nconstants\n\nConstant features to add to the Raster object with each value in a list or 1d ndarray representing an additional feature. If a list-like object of values os passed, then each numeric value will be appended as constant features to the last columns in the data. It is therefore important that all features including constant features are present in the same order as what was used to train the model. If a dict is passed, then the keys of the dict must refer to the names of raster layers in the Raster object. In this case, the values of the dict will replace the values of the raster layers in the Raster object.\nNone\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster containing predicted class probabilities. Each predicted class is represented by a RasterLayer object. The RasterLayers are named prob_n for 1,2,3..n, with n based on the index position of the classes, not the number of the class itself. For example, a classification model predicting classes with integer values of 1, 3, and 5 would result in three RasterLayers named ‘prob_1’, ‘prob_2’ and ‘prob_3’.\n\n\n\n\n\n\n\nRaster.read(masked=False, window=None, out_shape=None, resampling='nearest', as_df=False, **kwargs)\nReads data from the Raster object into a numpy array.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmasked\nbool (default False)\nRead data into a masked array.\nFalse\n\n\nwindow\nrasterio.window.Window object (optional\nTuple of col_off, row_off, width, height of a window of data to read a chunk of data into a ndarray.\nNone)\n\n\nout_shape\ntuple (optional\nShape of shape of array (rows, cols) to read data into using decimated reads.\nNone)\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use when applying decimated reads when out_shape is specified. Supported methods are: ‘average’, ‘bilinear’, ‘cubic’, ‘cubic_spline’, ‘gauss’, ‘lanczos’, ‘max’, ‘med’, ‘min’, ‘mode’, ‘q1’, ‘q3’.\n'nearest'\n\n\nas_df\nbool (default False)\nWhether to return the data as a pandas.DataFrame with columns named by the RasterLayer names.\nFalse\n\n\n**kwargs\ndict\nOther arguments to pass to rasterio.DatasetReader.read method\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nndarray\nRaster values in 3d ndarray with the dimensions in order of (band, row, and column).\n\n\n\n\n\n\n\nRaster.rename(names, in_place=False)\nRename a RasterLayer within the Raster object.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nnames\ndict\ndict of old_name : new_name\nrequired\n\n\nin_place\nbool (default False)\nWhether to change names of the Raster object in-place or leave original and return a new Raster object.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nReturned only if in_place is False\n\n\n\n\n\n\n\nRaster.sample(size, strata=None, return_array=False, random_state=None)\nGenerates a random sample of according to size, and samples the pixel values.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsize\nint\nNumber of random samples or number of samples per strata if a strata object is supplied.\nrequired\n\n\nstrata\npyspatialml Raster object (opt)\nWhether to use stratified instead of random sampling. Strata can be supplied using another pyspatialml.Raster object.\nNone\n\n\nreturn_array\nbool(opt)\nOptionally return extracted data as separate X and xy masked numpy arrays.\nFalse\n\n\nrandom_state\nint(opt)\ninteger to use within random.seed.\nNone\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npandas.pandas.DataFrame\nDataFrame containing values of names of RasterLayers in the Raster if return_array is False.\n\n\ntuple\nA tuple containing two elements if return_array is True: - numpy.ndarray Numpy array of extracted raster values, typically 2d. - numpy.ndarray 2D numpy array of xy coordinates of extracted values.\n\n\n\n\n\n\n\nRaster.scale(centre=True, scale=True, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False)\nStandardize (centre and scale) a Raster object by subtracting the mean and dividing by the standard deviation for each layer in the object.\nThe mean and standard deviation statistics are calculated for each layer separately.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncentre\nbool\nWhether to subtract the mean from each layer.\nis True\n\n\nscale\nbool\nWhether to divide each layer by the standard deviation of the layer.\nis True\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, a data type is set based on the data type of the prediction.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for operation.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nPyspatialml.Raster object with rescaled data.\n\n\n\n\n\n\n\n\nRaster.set_block_shape(value)\nSet the block shape of the raster, i.e. the height and width of windows to read in chunks for the predict, predict_proba, apply, and other supported-methods.\nNote block shape can also be set with myraster.block_shape = (500, 500)\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nvalue\ntuple\nA tuple of (height, width) for the block window\nrequired\n\n\n\n\n\n\n\nRaster.tail()\nReturn the last 10 rows from the Raster as a ndarray\n\n\n\nRaster.to_crs(crs, resampling='nearest', file_path=None, in_memory=False, driver='GTiff', nodata=None, n_jobs=1, warp_mem_lim=0, progress=False, **kwargs)\nReprojects a Raster object to a different crs.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncrs\nrasterio.transform.CRS object, or dict\nExample: CRS({‘init’: ‘EPSG:4326’})\nrequired\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use. One of the following: nearest, bilinear, cubic, cubic_spline, lanczos, average, mode, max (GDAL >= 2.2), min (GDAL >= 2.2), med (GDAL >= 2.2), q1 (GDAL >= 2.2), q3 (GDAL >= 2.2)\n'nearest'\n\n\nfile_path\nstr (optional\nOptional path to save reprojected Raster object. If not specified then a tempfile is used.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then the existing nodata value of the Raster object is used, which can accommodate the dtypes of the individual layers in the Raster.\nNone)\n\n\nn_jobs\nint (default 1)\nThe number of warp worker threads.\n1\n\n\nwarp_mem_lim\nint (default 0)\nThe warp operation memory limit in MB. Larger values allow the warp operation to be carried out in fewer chunks. The amount of memory required to warp a 3-band uint8 2000 row x 2000 col raster to a destination of the same size is approximately 56 MB. The default (0) means 64 MB with GDAL 2.2.\n0\n\n\nprogress\nbool (default False)\nOptionally show progress of transform operations.\nFalse\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster following reprojection.\n\n\n\n\n\n\n\nRaster.to_pandas(max_pixels=None, resampling='nearest')\nRaster to pandas DataFrame.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\n\nMaximum number of pixels to sample. By default all pixels are used.\nNone\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use when applying decimated reads when out_shape is specified. Supported methods are: ‘average’, ‘bilinear’, ‘cubic’, ‘cubic_spline’, ‘gauss’, ‘lanczos’, ‘max’, ‘med’, ‘min’, ‘mode’, ‘q1’, ‘q3’.\n'nearest'\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npandas.pandas.DataFrame\nDataFrame containing values of names of RasterLayers in the Raster as columns, and pixel values as rows.\n\n\n\n\n\n\n\nRaster.write(file_path, driver='GTiff', dtype=None, nodata=None, **kwargs)\nWrite the Raster object to a file.\nOverrides the write RasterBase class method, which is a partial function of the rasterio.DatasetReader.write method.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfile_path\nstr\nFile path used to save the Raster object.\nrequired\n\n\ndriver\nstr (default is ‘GTiff’).\nName of GDAL driver used to save Raster data.\n'GTiff'\n\n\ndtype\nstr (opt\nOptionally specify a numpy compatible data type when saving to file. If not specified, a data type is selected based on the data types of RasterLayers in the Raster object.\nNone)\n\n\nnodata\nany number (opt\nOptionally assign a new nodata value when saving to file. If not specified a nodata value based on the minimum permissible value for the data types of RasterLayers in the Raster object is used. Note that this does not change the pixel nodata values of the raster, it only changes the metadata of what value represents a nodata pixel.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nNew Raster object from saved file.",
+ "objectID": "docs/multitarget-regression-soil-properties.html#developing-a-machine-learning-model",
+ "href": "docs/multitarget-regression-soil-properties.html#developing-a-machine-learning-model",
+ "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
+ "section": "Developing a Machine Learning Model",
+ "text": "Developing a Machine Learning Model\nHere we are going to create a machine learning pipeline that correctly handles categorical predictors via one-hot encoding:\n\nstack.names\n\ndict_keys(['chnl_dist', 'dem', 'dist', 'ffreq', 'landimg2', 'landimg3', 'landimg4', 'mrvbf', 'rsp', 'slope', 'soil', 'twi', 'x_coordinates', 'y_coordinates', 'tl', 'tr', 'bl', 'br', 'c'])\n\n\n\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.ensemble import ExtraTreesRegressor\nfrom sklearn.preprocessing import OneHotEncoder\nfrom sklearn.compose import ColumnTransformer\n\nsoil_idx = [i for i, name in enumerate(stack.names) if name == 'soil']\n\ntrans = ColumnTransformer([\n ('ohe', OneHotEncoder(categories='auto', handle_unknown='ignore'), soil_idx)\n ], remainder='passthrough')\n\net = ExtraTreesRegressor(n_estimators=500, n_jobs=-1, random_state=1234)\net = Pipeline([\n ('preproc', trans),\n ('regressor', et)])\n\nNow we can separate our response and predictor variables and train the model:\n\nX = training_df.loc[:, stack.names]\ny = training_df.loc[:, ['lead', 'cadmium', 'copper', 'zinc', 'om']]\net.fit(X, y)\n\nPipeline(steps=[('preproc',\n ColumnTransformer(remainder='passthrough',\n transformers=[('ohe',\n OneHotEncoder(handle_unknown='ignore'),\n [10])])),\n ('regressor',\n ExtraTreesRegressor(n_estimators=500, n_jobs=-1,\n random_state=1234))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. Pipeline?Documentation for PipelineiFittedPipeline(steps=[('preproc',\n ColumnTransformer(remainder='passthrough',\n transformers=[('ohe',\n OneHotEncoder(handle_unknown='ignore'),\n [10])])),\n ('regressor',\n ExtraTreesRegressor(n_estimators=500, n_jobs=-1,\n random_state=1234))]) preproc: ColumnTransformer?Documentation for preproc: ColumnTransformerColumnTransformer(remainder='passthrough',\n transformers=[('ohe', OneHotEncoder(handle_unknown='ignore'),\n [10])]) ohe[10] OneHotEncoder?Documentation for OneHotEncoderOneHotEncoder(handle_unknown='ignore') remainder['chnl_dist', 'dem', 'dist', 'ffreq', 'landimg2', 'landimg3', 'landimg4', 'mrvbf', 'rsp', 'slope', 'twi', 'x_coordinates', 'y_coordinates', 'tl', 'tr', 'bl', 'br', 'c'] passthroughpassthrough ExtraTreesRegressor?Documentation for ExtraTreesRegressorExtraTreesRegressor(n_estimators=500, n_jobs=-1, random_state=1234) \n\n\nTo evaluate the performance of the model, we will use 10-fold cross validation:\n\nfrom sklearn.model_selection import cross_validate, KFold\n\nouter = KFold(n_splits=10, shuffle=True, random_state=1234)\nscores = cross_validate(et, X, y, scoring='neg_mean_squared_error', cv=10, n_jobs=1)\nrmse = np.sqrt(-scores['test_score']).mean()\n\nprint(\"Our RMSE score is {}\".format(rmse))\n\nOur RMSE score is 105.19227221271413",
"crumbs": [
- "Reference",
- "Raster datasets",
- "Raster"
+ "Tutorials",
+ "Multitarget regression",
+ "Multi-Target Spatial Prediction using the Meuse Dataset"
]
},
{
- "objectID": "reference/Raster.html#attributes",
- "href": "reference/Raster.html#attributes",
- "title": "Raster",
- "section": "",
- "text": "Name\nType\nDescription\n\n\n\n\nfiles\nlist\nA list of the raster dataset files that are used in the Raster. This does not have to be the same length as the number of RasterLayers because some files may have multiple bands.\n\n\nmeta\ndict\nA dict containing the raster metadata. The dict contains the following keys/values: crs : the crs object transform : the Affine.affine transform object width : width of the Raster in pixels height : height of the Raster in pixels count : number of RasterLayers within the Raster dtype : the numpy datatype that represents lowest common denominator of the different dtypes for all of the layers in the Raster.\n\n\nnames\nlist\nA list of the RasterLayer names.\n\n\nblock_shape\ntuple\nThe default block_shape in (rows, cols) for reading windows of data in the Raster for out-of-memory processing.",
+ "objectID": "docs/multitarget-regression-soil-properties.html#feature-importances",
+ "href": "docs/multitarget-regression-soil-properties.html#feature-importances",
+ "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
+ "section": "Feature Importances",
+ "text": "Feature Importances\n\nohe_names = deepcopy(list(stack.names))\nohe_names.insert(soil_idx[0], 'soil1')\nohe_names.insert(soil_idx[0], 'soil2')\nohe_names = np.array(ohe_names)\n\n\nmpl.style.use('ggplot')\n\nfimp = et.named_steps['regressor'].feature_importances_\n\nfig, ax = plt.subplots(figsize=(4, 6))\nax.barh(y=ohe_names[fimp.argsort()], width=fimp[fimp.argsort()])\nax.set_xlabel('Feature Importance Score')\nplt.show()",
"crumbs": [
- "Reference",
- "Raster datasets",
- "Raster"
+ "Tutorials",
+ "Multitarget regression",
+ "Multi-Target Spatial Prediction using the Meuse Dataset"
]
},
{
- "objectID": "reference/Raster.html#methods",
- "href": "reference/Raster.html#methods",
- "title": "Raster",
- "section": "",
- "text": "Name\nDescription\n\n\n\n\naggregate\nAggregates a raster to (usually) a coarser grid cell size.\n\n\nalter\nApply a fitted scikit-learn transformer to a Raster object.\n\n\nappend\nMethod to add new RasterLayers to a Raster object.\n\n\napply\nApply user-supplied function to a Raster object.\n\n\nblock_shapes\nGenerator for windows for optimal reading and writing based\n\n\nclose\nClose all of the RasterLayer objects in the Raster.\n\n\ncopy\nCreates a shallow copy of a Raster object\n\n\ncrop\nCrops a Raster object by the supplied bounds.\n\n\ndrop\nDrop individual RasterLayers from a Raster object\n\n\nextract_raster\nSample a Raster object by an aligned raster of labelled pixels.\n\n\nextract_vector\nSample a Raster/RasterLayer using a geopandas GeoDataframe\n\n\nextract_xy\nSamples pixel values using an array of xy locations.\n\n\nhead\nReturn the first 10 rows from the Raster as a ndarray\n\n\nintersect\nPerform a intersect operation on the Raster object.\n\n\nmask\nMask a Raster object based on the outline of shapes in a\n\n\npredict\nApply prediction of a scikit learn model to a Raster.\n\n\npredict_proba\nApply class probability prediction of a scikit learn model to a Raster.\n\n\nread\nReads data from the Raster object into a numpy array.\n\n\nrename\nRename a RasterLayer within the Raster object.\n\n\nsample\nGenerates a random sample of according to size, and samples\n\n\nscale\nStandardize (centre and scale) a Raster object by\n\n\nset_block_shape\nSet the block shape of the raster, i.e. the height and width\n\n\ntail\nReturn the last 10 rows from the Raster as a ndarray\n\n\nto_crs\nReprojects a Raster object to a different crs.\n\n\nto_pandas\nRaster to pandas DataFrame.\n\n\nwrite\nWrite the Raster object to a file.\n\n\n\n\n\nRaster.aggregate(out_shape, resampling='nearest', file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nAggregates a raster to (usually) a coarser grid cell size.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nout_shape\ntuple\nNew shape in (rows, cols).\nrequired\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use when applying decimated reads when out_shape is specified. Supported methods are: ‘average’, ‘bilinear’, ‘cubic’, ‘cubic_spline’, ‘gauss’, ‘lanczos’, ‘max’, ‘med’, ‘min’, ‘mode’, ‘q1’, ‘q3’.\n'nearest'\n\n\nfile_path\nstr (optional\nFile path to save to cropped raster. If not supplied then the aggregated raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new intersected Raster is created using the dtype of the existing Raster dataset, which uses a dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s dtype. Note that this does not change the pixel nodata values of the raster, it only changes the metadata of what value represents a nodata pixel.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster object aggregated to a new pixel size.\n\n\n\n\n\n\n\nRaster.alter(transformer, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False)\nApply a fitted scikit-learn transformer to a Raster object.\nCan be used to transform a raster using methods such as StandardScaler, RobustScaler etc.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntransformer\na sklearn.preprocessing.Transformer object\n\nrequired\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, a data type is set based on the data type of the prediction.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for operation.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nPyspatialml.Raster object with transformed data.\n\n\n\n\n\n\n\n\nRaster.append(other, in_place=False)\nMethod to add new RasterLayers to a Raster object.\nNote that this modifies the Raster object in-place by default.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nother\nRaster object, or list of Raster objects\nObject to append to the Raster.\nrequired\n\n\nin_place\nbool (default False)\nWhether to change the Raster object in-place or leave original and return a new Raster object.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nReturned only if in_place is False\n\n\n\n\n\n\n\nRaster.apply(function, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False, function_args={}, **kwargs)\nApply user-supplied function to a Raster object.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfunction\nfunction\nFunction that takes an numpy array as a single argument.\nrequired\n\n\nfile_path\nstr (optional\nOptional path to save calculated Raster object. If not specified then a tempfile is used.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new Raster is created using the dtype of the calculation result.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this changes the values of the pixels that represent nodata pixels.\nNone)\n\n\nprogress\nbool (default False)\nOptionally show progress of transform operations.\nFalse\n\n\nfunction_args\ndict(optional)\nOptionally pass arguments to the function as a dict or keyword arguments.\n{}\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster containing the calculated result.\n\n\n\n\n\n\n\nRaster.block_shapes(rows, cols)\nGenerator for windows for optimal reading and writing based on the raster format Windows and returns as a tuple with xoff, yoff, width, height.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nrows\nint\nHeight of window in rows.\nrequired\n\n\ncols\nint\nWidth of window in columns.\nrequired\n\n\n\n\n\n\n\nRaster.close()\nClose all of the RasterLayer objects in the Raster.\nNote that this will cause any rasters based on temporary files to be removed. This is intended as a method of clearing temporary files that may have accumulated during an analysis session.\n\n\n\nRaster.copy(subset=None)\nCreates a shallow copy of a Raster object\nNote that shallow in the context of a Raster object means that an immutable copy of the object is made, however the on-disk and in-memory file locations remain the same.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsubset\nopt\nA list of layer names to subset while copying.\nNone\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\n\n\n\n\n\n\n\n\nRaster.crop(bounds, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nCrops a Raster object by the supplied bounds.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nbounds\ntuple\nA tuple containing the bounding box to clip by in the form of (xmin, ymin, xmax, ymax).\nrequired\n\n\nfile_path\nstr (optional\nFile path to save to cropped raster. If not supplied then the cropped raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’). Default is ‘GTiff’\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new intersected Raster is created using the dtype of theexisting Raster dataset, which uses a dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this does not change the pixel nodata values of the raster, it only changes the metadata of what value represents a nodata pixel.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster cropped to new extent.\n\n\n\n\n\n\n\nRaster.drop(labels, in_place=False)\nDrop individual RasterLayers from a Raster object\nNote that this modifies the Raster object in-place by default.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlabels\nsingle label or list-like\nIndex (int) or layer name to drop. Can be a single integer or label, or a list of integers or labels.\nrequired\n\n\nin_place\nbool (default False)\nWhether to change the Raster object in-place or leave original and return a new Raster object.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nReturned only if in_place is True\n\n\n\n\n\n\n\nRaster.extract_raster(src, progress=False)\nSample a Raster object by an aligned raster of labelled pixels.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsrc\n\nSingle band raster containing labelled pixels as an open rasterio DatasetReader object.\nrequired\n\n\nprogress\nbool(opt)\nShow a progress bar for extraction.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\ngeopandas.geopandas.GeoDataFrame\nGeodataframe containing extracted data as point features if return_array=False\n\n\n\n\n\n\n\nRaster.extract_vector(gdf, progress=False)\nSample a Raster/RasterLayer using a geopandas GeoDataframe containing points, lines or polygon features.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ngdf\n\nContaining either point, line or polygon geometries. Overlapping geometries will cause the same pixels to be sampled.\nrequired\n\n\nprogress\nbool(opt)\nShow a progress bar for extraction.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\ngeopandas.geopandas.GeoDataframe\nContaining extracted data as point geometries (one point per pixel). The resulting GeoDataFrame is indexed using a named pandas.MultiIndex, with pixel_idx index referring to the index of each pixel that was sampled, and the geometry_idx index referring to the index of the each geometry in the supplied gdf. This makes it possible to keep track of how sampled pixel relates to the original geometries, i.e. multiple pixels being extracted within the area of a single polygon that can be referred to using the geometry_idx. The extracted data can subsequently be joined with the attribute table of the supplied gdf using: training_py = geopandas.read_file(nc.polygons) df = self.stack.extract_vector(gdf=training_py) df = df.dropna() df = df.merge( right=training_py.loc[:, (“id”, “label”)], left_on=“polygon_idx”, right_on=“id”, right_index=True )\n\n\n\n\n\n\n\nRaster.extract_xy(xys, return_array=False, progress=False)\nSamples pixel values using an array of xy locations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nxys\n2d array-like\nx and y coordinates from which to sample the raster (n_samples, xys).\nrequired\n\n\nreturn_array\nbool(opt)\nBy default the extracted pixel values are returned as a geopandas.GeoDataFrame. If return_array=True then the extracted pixel values are returned as a tuple of numpy.ndarrays.\nFalse\n\n\nprogress\nbool(opt)\nShow a progress bar for extraction.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\ngeopandas.geopandas.GeoDataframe\nContaining extracted data as point geometries if return_array=False.\n\n\nnumpy.numpy.ndarray\n2d masked array containing sampled raster values (sample, bands) at the x,y locations.\n\n\n\n\n\n\n\nRaster.head()\nReturn the first 10 rows from the Raster as a ndarray\n\n\n\nRaster.intersect(file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nPerform a intersect operation on the Raster object.\nComputes the geometric intersection of the RasterLayers with the Raster object. This will cause nodata values in any of the rasters to be propagated through all of the output rasters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfile_path\nstr (optional\nFile path to save to resulting Raster. If not supplied then the resulting Raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new intersected Raster is created using the dtype of the existing Raster dataset, which uses a dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this changes the values of the pixels that represent nodata to the new value.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster with layers that are masked based on a union of all masks in the suite of RasterLayers.\n\n\n\n\n\n\n\nRaster.mask(shapes, invert=False, crop=True, pad=False, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nMask a Raster object based on the outline of shapes in a geopandas.GeoDataFrame\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nshapes\ngeopandas.geopandas.GeoDataFrame\nGeoDataFrame containing masking features.\nrequired\n\n\ninvert\nbool (default False)\nIf False then pixels outside shapes will be masked. If True then pixels inside shape will be masked.\nFalse\n\n\ncrop\nbool (default True)\nCrop the raster to the extent of the shapes.\nTrue\n\n\npad\nbool (default False)\nIf True, the features will be padded in each direction by one half of a pixel prior to cropping raster.\nFalse\n\n\nfile_path\nstr (optional\nFile path to save to resulting Raster. If not supplied then the resulting Raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the cropped Raster is created using the existing dtype, which usesa dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for cropped dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this changes the values of the pixels to the new nodata value, and changes the metadata of the raster.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nRaster with masked layers.\n\n\n\n\n\n\n\nRaster.predict(estimator, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False, constants=None, **kwargs)\nApply prediction of a scikit learn model to a Raster.\nThe model can represent any scikit learn model or compatible api with a fit and predict method. These can consist of classification or regression models. Multi-class classifications and multi-target regressions are also supported.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nestimator\nestimator object implementing ‘fit’\nThe object to use to fit the data.\nrequired\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, np.float32 is assumed.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for prediction.\nFalse\n\n\nconstants\n\nConstant features to add to the Raster object with each value in a list or 1d ndarray representing an additional feature. If a list-like object of values os passed, then each numeric value will be appended as constant features to the last columns in the data. It is therefore important that all features including constant features are present in the same order as what was used to train the model. If a dict is passed, then the keys of the dict must refer to the names of raster layers in the Raster object. In this case, the values of the dict will replace the values of the raster layers in the Raster object.\nNone\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster object containing prediction results as a RasterLayers. For classification and regression models, the Raster will contain a single RasterLayer, unless the model is multi-class or multi-target. Layers are named automatically as pred_raw_n with n = 1, 2, 3 ..n.\n\n\n\n\n\n\n\nRaster.predict_proba(estimator, file_path=None, in_memory=False, indexes=None, driver='GTiff', dtype=None, nodata=None, constants=None, progress=False, **kwargs)\nApply class probability prediction of a scikit learn model to a Raster.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nestimator\nestimator object implementing ‘fit’\nThe object to use to fit the data.\nrequired\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\nindexes\nlist of integers (optional\nList of class indices to export. In some circumstances, only a subset of the class probability estimations are desired, for instance when performing a binary classification only the probabilities for the positive class may be desired.\nNone)\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, a data type is set based on the data type of the prediction.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for prediction.\nFalse\n\n\nconstants\n\nConstant features to add to the Raster object with each value in a list or 1d ndarray representing an additional feature. If a list-like object of values os passed, then each numeric value will be appended as constant features to the last columns in the data. It is therefore important that all features including constant features are present in the same order as what was used to train the model. If a dict is passed, then the keys of the dict must refer to the names of raster layers in the Raster object. In this case, the values of the dict will replace the values of the raster layers in the Raster object.\nNone\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster containing predicted class probabilities. Each predicted class is represented by a RasterLayer object. The RasterLayers are named prob_n for 1,2,3..n, with n based on the index position of the classes, not the number of the class itself. For example, a classification model predicting classes with integer values of 1, 3, and 5 would result in three RasterLayers named ‘prob_1’, ‘prob_2’ and ‘prob_3’.\n\n\n\n\n\n\n\nRaster.read(masked=False, window=None, out_shape=None, resampling='nearest', as_df=False, **kwargs)\nReads data from the Raster object into a numpy array.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmasked\nbool (default False)\nRead data into a masked array.\nFalse\n\n\nwindow\nrasterio.window.Window object (optional\nTuple of col_off, row_off, width, height of a window of data to read a chunk of data into a ndarray.\nNone)\n\n\nout_shape\ntuple (optional\nShape of shape of array (rows, cols) to read data into using decimated reads.\nNone)\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use when applying decimated reads when out_shape is specified. Supported methods are: ‘average’, ‘bilinear’, ‘cubic’, ‘cubic_spline’, ‘gauss’, ‘lanczos’, ‘max’, ‘med’, ‘min’, ‘mode’, ‘q1’, ‘q3’.\n'nearest'\n\n\nas_df\nbool (default False)\nWhether to return the data as a pandas.DataFrame with columns named by the RasterLayer names.\nFalse\n\n\n**kwargs\ndict\nOther arguments to pass to rasterio.DatasetReader.read method\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nndarray\nRaster values in 3d ndarray with the dimensions in order of (band, row, and column).\n\n\n\n\n\n\n\nRaster.rename(names, in_place=False)\nRename a RasterLayer within the Raster object.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nnames\ndict\ndict of old_name : new_name\nrequired\n\n\nin_place\nbool (default False)\nWhether to change names of the Raster object in-place or leave original and return a new Raster object.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nReturned only if in_place is False\n\n\n\n\n\n\n\nRaster.sample(size, strata=None, return_array=False, random_state=None)\nGenerates a random sample of according to size, and samples the pixel values.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsize\nint\nNumber of random samples or number of samples per strata if a strata object is supplied.\nrequired\n\n\nstrata\npyspatialml Raster object (opt)\nWhether to use stratified instead of random sampling. Strata can be supplied using another pyspatialml.Raster object.\nNone\n\n\nreturn_array\nbool(opt)\nOptionally return extracted data as separate X and xy masked numpy arrays.\nFalse\n\n\nrandom_state\nint(opt)\ninteger to use within random.seed.\nNone\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npandas.pandas.DataFrame\nDataFrame containing values of names of RasterLayers in the Raster if return_array is False.\n\n\ntuple\nA tuple containing two elements if return_array is True: - numpy.ndarray Numpy array of extracted raster values, typically 2d. - numpy.ndarray 2D numpy array of xy coordinates of extracted values.\n\n\n\n\n\n\n\nRaster.scale(centre=True, scale=True, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False)\nStandardize (centre and scale) a Raster object by subtracting the mean and dividing by the standard deviation for each layer in the object.\nThe mean and standard deviation statistics are calculated for each layer separately.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncentre\nbool\nWhether to subtract the mean from each layer.\nis True\n\n\nscale\nbool\nWhether to divide each layer by the standard deviation of the layer.\nis True\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, a data type is set based on the data type of the prediction.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for operation.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nPyspatialml.Raster object with rescaled data.\n\n\n\n\n\n\n\n\nRaster.set_block_shape(value)\nSet the block shape of the raster, i.e. the height and width of windows to read in chunks for the predict, predict_proba, apply, and other supported-methods.\nNote block shape can also be set with myraster.block_shape = (500, 500)\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nvalue\ntuple\nA tuple of (height, width) for the block window\nrequired\n\n\n\n\n\n\n\nRaster.tail()\nReturn the last 10 rows from the Raster as a ndarray\n\n\n\nRaster.to_crs(crs, resampling='nearest', file_path=None, in_memory=False, driver='GTiff', nodata=None, n_jobs=1, warp_mem_lim=0, progress=False, **kwargs)\nReprojects a Raster object to a different crs.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncrs\nrasterio.transform.CRS object, or dict\nExample: CRS({‘init’: ‘EPSG:4326’})\nrequired\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use. One of the following: nearest, bilinear, cubic, cubic_spline, lanczos, average, mode, max (GDAL >= 2.2), min (GDAL >= 2.2), med (GDAL >= 2.2), q1 (GDAL >= 2.2), q3 (GDAL >= 2.2)\n'nearest'\n\n\nfile_path\nstr (optional\nOptional path to save reprojected Raster object. If not specified then a tempfile is used.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then the existing nodata value of the Raster object is used, which can accommodate the dtypes of the individual layers in the Raster.\nNone)\n\n\nn_jobs\nint (default 1)\nThe number of warp worker threads.\n1\n\n\nwarp_mem_lim\nint (default 0)\nThe warp operation memory limit in MB. Larger values allow the warp operation to be carried out in fewer chunks. The amount of memory required to warp a 3-band uint8 2000 row x 2000 col raster to a destination of the same size is approximately 56 MB. The default (0) means 64 MB with GDAL 2.2.\n0\n\n\nprogress\nbool (default False)\nOptionally show progress of transform operations.\nFalse\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster following reprojection.\n\n\n\n\n\n\n\nRaster.to_pandas(max_pixels=None, resampling='nearest')\nRaster to pandas DataFrame.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\n\nMaximum number of pixels to sample. By default all pixels are used.\nNone\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use when applying decimated reads when out_shape is specified. Supported methods are: ‘average’, ‘bilinear’, ‘cubic’, ‘cubic_spline’, ‘gauss’, ‘lanczos’, ‘max’, ‘med’, ‘min’, ‘mode’, ‘q1’, ‘q3’.\n'nearest'\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npandas.pandas.DataFrame\nDataFrame containing values of names of RasterLayers in the Raster as columns, and pixel values as rows.\n\n\n\n\n\n\n\nRaster.write(file_path, driver='GTiff', dtype=None, nodata=None, **kwargs)\nWrite the Raster object to a file.\nOverrides the write RasterBase class method, which is a partial function of the rasterio.DatasetReader.write method.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfile_path\nstr\nFile path used to save the Raster object.\nrequired\n\n\ndriver\nstr (default is ‘GTiff’).\nName of GDAL driver used to save Raster data.\n'GTiff'\n\n\ndtype\nstr (opt\nOptionally specify a numpy compatible data type when saving to file. If not specified, a data type is selected based on the data types of RasterLayers in the Raster object.\nNone)\n\n\nnodata\nany number (opt\nOptionally assign a new nodata value when saving to file. If not specified a nodata value based on the minimum permissible value for the data types of RasterLayers in the Raster object is used. Note that this does not change the pixel nodata values of the raster, it only changes the metadata of what value represents a nodata pixel.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nNew Raster object from saved file.",
+ "objectID": "docs/multitarget-regression-soil-properties.html#prediction-on-the-raster-object",
+ "href": "docs/multitarget-regression-soil-properties.html#prediction-on-the-raster-object",
+ "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
+ "section": "Prediction on the Raster object",
+ "text": "Prediction on the Raster object\n\npreds = stack.predict(et)\npreds.rename(\n {old: new for old, new in zip(preds.names, ['lead', 'cadmium', 'copper', 'zinc', 'om'])},\n in_place=True\n)\npreds.lead.cmap = 'rainbow'\npreds.cadmium.cmap = 'rainbow'\npreds.copper.cmap = 'rainbow'\npreds.zinc.cmap = 'rainbow'\npreds.om.cmap = 'rainbow'\n\n/Users/stevenpawley/Library/Caches/pypoetry/virtualenvs/pyspatialml-NqZ1tMUm-py3.11/lib/python3.11/site-packages/sklearn/base.py:493: UserWarning: X does not have valid feature names, but OneHotEncoder was fitted with feature names\n warnings.warn(\n/Users/stevenpawley/Library/Caches/pypoetry/virtualenvs/pyspatialml-NqZ1tMUm-py3.11/lib/python3.11/site-packages/sklearn/base.py:493: UserWarning: X does not have valid feature names, but OneHotEncoder was fitted with feature names\n warnings.warn(\n\n\nPlot the results:\n\npreds.plot(out_shape=(200, 200), title_fontsize=14, figsize=(10, 8))\nplt.show()",
"crumbs": [
- "Reference",
- "Raster datasets",
- "Raster"
+ "Tutorials",
+ "Multitarget regression",
+ "Multi-Target Spatial Prediction using the Meuse Dataset"
]
},
{
@@ -388,233 +290,275 @@
]
},
{
- "objectID": "docs/multitarget-regression-soil-properties.html",
- "href": "docs/multitarget-regression-soil-properties.html",
- "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
+ "objectID": "reference/Raster.html",
+ "href": "reference/Raster.html",
+ "title": "Raster",
"section": "",
- "text": "Here we are using the meuse dataset which is included in the pyspatialml package as an example of performing a spatial model and prediction. We can access the datasets using the pyspatialml.datasets module:\nfrom copy import deepcopy\nfrom tempfile import NamedTemporaryFile\nimport geopandas as gpd\nimport numpy as np\nfrom pyspatialml import Raster\nfrom pyspatialml.preprocessing import xy_coordinates, distance_to_corners\nimport pyspatialml.datasets.meuse as ms\n\nimport matplotlib as mpl\nimport matplotlib.pyplot as plt\nfrom matplotlib import cm\npredictor_files = ms.predictors\ntraining_pts_file = ms.meuse\nstack = Raster(predictor_files)\nstack.names\n\ndict_keys(['chnl_dist', 'dem', 'dist', 'ffreq', 'landimg2', 'landimg3', 'landimg4', 'mrvbf', 'rsp', 'slope', 'soil', 'twi'])\nPyspatialml implements pandas-style indexing for Raster objects, using Raster.loc to index by the name of the raster, and Raster.iloc to select by index. This method also accepts slices. Label-based indexing is also provided directly by the __getattr_ magic method, i.e. Raster[name] or for multiple layers Raster[(names)].\nFor example we can remove layers from Raster object using the Raster.drop method, or by subsetting the raster:\nstack.drop('ffreq')\n\nRaster Object Containing 11 Layers\n attribute values\n0 names [chnl_dist, dem, dist, landimg2, landimg3, lan...\n1 files [/Users/stevenpawley/GitHub/Pyspatialml/pyspat...\n2 rows 104\n3 cols 78\n4 res (40.0, 40.0)\n5 nodatavals [-99999.0, -99999.0, -1.0, -1.0, -1.0, -1.0, -...\nWe can store matplotlib cmaps as an attribute within each layer in the Raster:\nstack.chnl_dist.cmap = 'RdBu'\nstack.dem.cmap = 'terrain'\nstack.dist.cmap = 'Reds'\nstack.landimg2.cmap = 'Greys'\nstack.landimg3.cmap = 'Greys'\nstack.landimg4.cmap = 'Greys'\nstack.landimg4.cmap = 'Greys'\nstack.mrvbf.cmap = 'jet'\nstack.rsp.cmap = 'gnuplot2'\nstack.slope.cmap = 'PuRd'\nstack.soil.cmap = 'Set2'\nstack.twi.cmap = 'coolwarm'\nPlot the predictors in the Raster object as a raster matrix:\nmpl.style.use('seaborn-v0_8')\naxs = stack.plot(figsize=(9, 7))\nax = axs.flatten()[10]\nim = ax.images\nim[0].colorbar.set_ticks([1,2,3])\nax = axs.flatten()[8]\nax.tick_params(axis='x', labelrotation=65)\n\nplt.tight_layout()\nplt.show()",
+ "text": "Raster(self, src, crs=None, transform=None, nodata=None, file_path=None, driver=None, tempdir=tempfile.tempdir, in_memory=False)\nCreates a collection of file-based GDAL-supported raster datasets that share a common coordinate reference system and geometry.\nRaster objects encapsulate RasterLayer objects, which represent single band raster datasets that can physically be represented by either separate single-band raster files, multi-band raster files, or any combination of individual bands from multi-band raster and single-band raster datasets.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nfiles\nlist\nA list of the raster dataset files that are used in the Raster. This does not have to be the same length as the number of RasterLayers because some files may have multiple bands.\n\n\nmeta\ndict\nA dict containing the raster metadata. The dict contains the following keys/values: crs : the crs object transform : the Affine.affine transform object width : width of the Raster in pixels height : height of the Raster in pixels count : number of RasterLayers within the Raster dtype : the numpy datatype that represents lowest common denominator of the different dtypes for all of the layers in the Raster.\n\n\nnames\nlist\nA list of the RasterLayer names.\n\n\nblock_shape\ntuple\nThe default block_shape in (rows, cols) for reading windows of data in the Raster for out-of-memory processing.\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\naggregate\nAggregates a raster to (usually) a coarser grid cell size.\n\n\nalter\nApply a fitted scikit-learn transformer to a Raster object.\n\n\nappend\nMethod to add new RasterLayers to a Raster object.\n\n\napply\nApply user-supplied function to a Raster object.\n\n\nblock_shapes\nGenerator for windows for optimal reading and writing based\n\n\nclose\nClose all of the RasterLayer objects in the Raster.\n\n\ncopy\nCreates a shallow copy of a Raster object\n\n\ncrop\nCrops a Raster object by the supplied bounds.\n\n\ndrop\nDrop individual RasterLayers from a Raster object\n\n\nextract_raster\nSample a Raster object by an aligned raster of labelled pixels.\n\n\nextract_vector\nSample a Raster/RasterLayer using a geopandas GeoDataframe\n\n\nextract_xy\nSamples pixel values using an array of xy locations.\n\n\nhead\nReturn the first 10 rows from the Raster as a ndarray\n\n\nintersect\nPerform a intersect operation on the Raster object.\n\n\nmask\nMask a Raster object based on the outline of shapes in a\n\n\npredict\nApply prediction of a scikit learn model to a Raster.\n\n\npredict_proba\nApply class probability prediction of a scikit learn model to a Raster.\n\n\nread\nReads data from the Raster object into a numpy array.\n\n\nrename\nRename a RasterLayer within the Raster object.\n\n\nsample\nGenerates a random sample of according to size, and samples\n\n\nscale\nStandardize (centre and scale) a Raster object by\n\n\nset_block_shape\nSet the block shape of the raster, i.e. the height and width\n\n\ntail\nReturn the last 10 rows from the Raster as a ndarray\n\n\nto_crs\nReprojects a Raster object to a different crs.\n\n\nto_pandas\nRaster to pandas DataFrame.\n\n\nwrite\nWrite the Raster object to a file.\n\n\n\n\n\nRaster.aggregate(out_shape, resampling='nearest', file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nAggregates a raster to (usually) a coarser grid cell size.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nout_shape\ntuple\nNew shape in (rows, cols).\nrequired\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use when applying decimated reads when out_shape is specified. Supported methods are: ‘average’, ‘bilinear’, ‘cubic’, ‘cubic_spline’, ‘gauss’, ‘lanczos’, ‘max’, ‘med’, ‘min’, ‘mode’, ‘q1’, ‘q3’.\n'nearest'\n\n\nfile_path\nstr (optional\nFile path to save to cropped raster. If not supplied then the aggregated raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new intersected Raster is created using the dtype of the existing Raster dataset, which uses a dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s dtype. Note that this does not change the pixel nodata values of the raster, it only changes the metadata of what value represents a nodata pixel.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster object aggregated to a new pixel size.\n\n\n\n\n\n\n\nRaster.alter(transformer, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False)\nApply a fitted scikit-learn transformer to a Raster object.\nCan be used to transform a raster using methods such as StandardScaler, RobustScaler etc.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntransformer\na sklearn.preprocessing.Transformer object\n\nrequired\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, a data type is set based on the data type of the prediction.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for operation.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nPyspatialml.Raster object with transformed data.\n\n\n\n\n\n\n\n\nRaster.append(other, in_place=False)\nMethod to add new RasterLayers to a Raster object.\nNote that this modifies the Raster object in-place by default.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nother\nRaster object, or list of Raster objects\nObject to append to the Raster.\nrequired\n\n\nin_place\nbool (default False)\nWhether to change the Raster object in-place or leave original and return a new Raster object.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nReturned only if in_place is False\n\n\n\n\n\n\n\nRaster.apply(function, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False, function_args={}, **kwargs)\nApply user-supplied function to a Raster object.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfunction\nfunction\nFunction that takes an numpy array as a single argument.\nrequired\n\n\nfile_path\nstr (optional\nOptional path to save calculated Raster object. If not specified then a tempfile is used.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new Raster is created using the dtype of the calculation result.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this changes the values of the pixels that represent nodata pixels.\nNone)\n\n\nprogress\nbool (default False)\nOptionally show progress of transform operations.\nFalse\n\n\nfunction_args\ndict(optional)\nOptionally pass arguments to the function as a dict or keyword arguments.\n{}\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster containing the calculated result.\n\n\n\n\n\n\n\nRaster.block_shapes(rows, cols)\nGenerator for windows for optimal reading and writing based on the raster format Windows and returns as a tuple with xoff, yoff, width, height.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nrows\nint\nHeight of window in rows.\nrequired\n\n\ncols\nint\nWidth of window in columns.\nrequired\n\n\n\n\n\n\n\nRaster.close()\nClose all of the RasterLayer objects in the Raster.\nNote that this will cause any rasters based on temporary files to be removed. This is intended as a method of clearing temporary files that may have accumulated during an analysis session.\n\n\n\nRaster.copy(subset=None)\nCreates a shallow copy of a Raster object\nNote that shallow in the context of a Raster object means that an immutable copy of the object is made, however the on-disk and in-memory file locations remain the same.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsubset\nopt\nA list of layer names to subset while copying.\nNone\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\n\n\n\n\n\n\n\n\nRaster.crop(bounds, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nCrops a Raster object by the supplied bounds.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nbounds\ntuple\nA tuple containing the bounding box to clip by in the form of (xmin, ymin, xmax, ymax).\nrequired\n\n\nfile_path\nstr (optional\nFile path to save to cropped raster. If not supplied then the cropped raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’). Default is ‘GTiff’\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new intersected Raster is created using the dtype of theexisting Raster dataset, which uses a dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this does not change the pixel nodata values of the raster, it only changes the metadata of what value represents a nodata pixel.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster cropped to new extent.\n\n\n\n\n\n\n\nRaster.drop(labels, in_place=False)\nDrop individual RasterLayers from a Raster object\nNote that this modifies the Raster object in-place by default.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlabels\nsingle label or list-like\nIndex (int) or layer name to drop. Can be a single integer or label, or a list of integers or labels.\nrequired\n\n\nin_place\nbool (default False)\nWhether to change the Raster object in-place or leave original and return a new Raster object.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nReturned only if in_place is True\n\n\n\n\n\n\n\nRaster.extract_raster(src, progress=False)\nSample a Raster object by an aligned raster of labelled pixels.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsrc\n\nSingle band raster containing labelled pixels as an open rasterio DatasetReader object.\nrequired\n\n\nprogress\nbool(opt)\nShow a progress bar for extraction.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\ngeopandas.geopandas.GeoDataFrame\nGeodataframe containing extracted data as point features if return_array=False\n\n\n\n\n\n\n\nRaster.extract_vector(gdf, progress=False)\nSample a Raster/RasterLayer using a geopandas GeoDataframe containing points, lines or polygon features.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ngdf\n\nContaining either point, line or polygon geometries. Overlapping geometries will cause the same pixels to be sampled.\nrequired\n\n\nprogress\nbool(opt)\nShow a progress bar for extraction.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\ngeopandas.geopandas.GeoDataframe\nContaining extracted data as point geometries (one point per pixel). The resulting GeoDataFrame is indexed using a named pandas.MultiIndex, with pixel_idx index referring to the index of each pixel that was sampled, and the geometry_idx index referring to the index of the each geometry in the supplied gdf. This makes it possible to keep track of how sampled pixel relates to the original geometries, i.e. multiple pixels being extracted within the area of a single polygon that can be referred to using the geometry_idx. The extracted data can subsequently be joined with the attribute table of the supplied gdf using: training_py = geopandas.read_file(nc.polygons) df = self.stack.extract_vector(gdf=training_py) df = df.dropna() df = df.merge( right=training_py.loc[:, (“id”, “label”)], left_on=“polygon_idx”, right_on=“id”, right_index=True )\n\n\n\n\n\n\n\nRaster.extract_xy(xys, return_array=False, progress=False)\nSamples pixel values using an array of xy locations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nxys\n2d array-like\nx and y coordinates from which to sample the raster (n_samples, xys).\nrequired\n\n\nreturn_array\nbool(opt)\nBy default the extracted pixel values are returned as a geopandas.GeoDataFrame. If return_array=True then the extracted pixel values are returned as a tuple of numpy.ndarrays.\nFalse\n\n\nprogress\nbool(opt)\nShow a progress bar for extraction.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\ngeopandas.geopandas.GeoDataframe\nContaining extracted data as point geometries if return_array=False.\n\n\nnumpy.numpy.ndarray\n2d masked array containing sampled raster values (sample, bands) at the x,y locations.\n\n\n\n\n\n\n\nRaster.head()\nReturn the first 10 rows from the Raster as a ndarray\n\n\n\nRaster.intersect(file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nPerform a intersect operation on the Raster object.\nComputes the geometric intersection of the RasterLayers with the Raster object. This will cause nodata values in any of the rasters to be propagated through all of the output rasters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfile_path\nstr (optional\nFile path to save to resulting Raster. If not supplied then the resulting Raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new intersected Raster is created using the dtype of the existing Raster dataset, which uses a dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this changes the values of the pixels that represent nodata to the new value.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster with layers that are masked based on a union of all masks in the suite of RasterLayers.\n\n\n\n\n\n\n\nRaster.mask(shapes, invert=False, crop=True, pad=False, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nMask a Raster object based on the outline of shapes in a geopandas.GeoDataFrame\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nshapes\ngeopandas.geopandas.GeoDataFrame\nGeoDataFrame containing masking features.\nrequired\n\n\ninvert\nbool (default False)\nIf False then pixels outside shapes will be masked. If True then pixels inside shape will be masked.\nFalse\n\n\ncrop\nbool (default True)\nCrop the raster to the extent of the shapes.\nTrue\n\n\npad\nbool (default False)\nIf True, the features will be padded in each direction by one half of a pixel prior to cropping raster.\nFalse\n\n\nfile_path\nstr (optional\nFile path to save to resulting Raster. If not supplied then the resulting Raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the cropped Raster is created using the existing dtype, which usesa dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for cropped dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this changes the values of the pixels to the new nodata value, and changes the metadata of the raster.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nRaster with masked layers.\n\n\n\n\n\n\n\nRaster.predict(estimator, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False, constants=None, **kwargs)\nApply prediction of a scikit learn model to a Raster.\nThe model can represent any scikit learn model or compatible api with a fit and predict method. These can consist of classification or regression models. Multi-class classifications and multi-target regressions are also supported.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nestimator\nestimator object implementing ‘fit’\nThe object to use to fit the data.\nrequired\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, np.float32 is assumed.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for prediction.\nFalse\n\n\nconstants\n\nConstant features to add to the Raster object with each value in a list or 1d ndarray representing an additional feature. If a list-like object of values os passed, then each numeric value will be appended as constant features to the last columns in the data. It is therefore important that all features including constant features are present in the same order as what was used to train the model. If a dict is passed, then the keys of the dict must refer to the names of raster layers in the Raster object. In this case, the values of the dict will replace the values of the raster layers in the Raster object.\nNone\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster object containing prediction results as a RasterLayers. For classification and regression models, the Raster will contain a single RasterLayer, unless the model is multi-class or multi-target. Layers are named automatically as pred_raw_n with n = 1, 2, 3 ..n.\n\n\n\n\n\n\n\nRaster.predict_proba(estimator, file_path=None, in_memory=False, indexes=None, driver='GTiff', dtype=None, nodata=None, constants=None, progress=False, **kwargs)\nApply class probability prediction of a scikit learn model to a Raster.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nestimator\nestimator object implementing ‘fit’\nThe object to use to fit the data.\nrequired\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\nindexes\nlist of integers (optional\nList of class indices to export. In some circumstances, only a subset of the class probability estimations are desired, for instance when performing a binary classification only the probabilities for the positive class may be desired.\nNone)\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, a data type is set based on the data type of the prediction.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for prediction.\nFalse\n\n\nconstants\n\nConstant features to add to the Raster object with each value in a list or 1d ndarray representing an additional feature. If a list-like object of values os passed, then each numeric value will be appended as constant features to the last columns in the data. It is therefore important that all features including constant features are present in the same order as what was used to train the model. If a dict is passed, then the keys of the dict must refer to the names of raster layers in the Raster object. In this case, the values of the dict will replace the values of the raster layers in the Raster object.\nNone\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster containing predicted class probabilities. Each predicted class is represented by a RasterLayer object. The RasterLayers are named prob_n for 1,2,3..n, with n based on the index position of the classes, not the number of the class itself. For example, a classification model predicting classes with integer values of 1, 3, and 5 would result in three RasterLayers named ‘prob_1’, ‘prob_2’ and ‘prob_3’.\n\n\n\n\n\n\n\nRaster.read(masked=False, window=None, out_shape=None, resampling='nearest', as_df=False, **kwargs)\nReads data from the Raster object into a numpy array.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmasked\nbool (default False)\nRead data into a masked array.\nFalse\n\n\nwindow\nrasterio.window.Window object (optional\nTuple of col_off, row_off, width, height of a window of data to read a chunk of data into a ndarray.\nNone)\n\n\nout_shape\ntuple (optional\nShape of shape of array (rows, cols) to read data into using decimated reads.\nNone)\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use when applying decimated reads when out_shape is specified. Supported methods are: ‘average’, ‘bilinear’, ‘cubic’, ‘cubic_spline’, ‘gauss’, ‘lanczos’, ‘max’, ‘med’, ‘min’, ‘mode’, ‘q1’, ‘q3’.\n'nearest'\n\n\nas_df\nbool (default False)\nWhether to return the data as a pandas.DataFrame with columns named by the RasterLayer names.\nFalse\n\n\n**kwargs\ndict\nOther arguments to pass to rasterio.DatasetReader.read method\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nndarray\nRaster values in 3d ndarray with the dimensions in order of (band, row, and column).\n\n\n\n\n\n\n\nRaster.rename(names, in_place=False)\nRename a RasterLayer within the Raster object.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nnames\ndict\ndict of old_name : new_name\nrequired\n\n\nin_place\nbool (default False)\nWhether to change names of the Raster object in-place or leave original and return a new Raster object.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nReturned only if in_place is False\n\n\n\n\n\n\n\nRaster.sample(size, strata=None, return_array=False, random_state=None)\nGenerates a random sample of according to size, and samples the pixel values.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsize\nint\nNumber of random samples or number of samples per strata if a strata object is supplied.\nrequired\n\n\nstrata\npyspatialml Raster object (opt)\nWhether to use stratified instead of random sampling. Strata can be supplied using another pyspatialml.Raster object.\nNone\n\n\nreturn_array\nbool(opt)\nOptionally return extracted data as separate X and xy masked numpy arrays.\nFalse\n\n\nrandom_state\nint(opt)\ninteger to use within random.seed.\nNone\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npandas.pandas.DataFrame\nDataFrame containing values of names of RasterLayers in the Raster if return_array is False.\n\n\ntuple\nA tuple containing two elements if return_array is True: - numpy.ndarray Numpy array of extracted raster values, typically 2d. - numpy.ndarray 2D numpy array of xy coordinates of extracted values.\n\n\n\n\n\n\n\nRaster.scale(centre=True, scale=True, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False)\nStandardize (centre and scale) a Raster object by subtracting the mean and dividing by the standard deviation for each layer in the object.\nThe mean and standard deviation statistics are calculated for each layer separately.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncentre\nbool\nWhether to subtract the mean from each layer.\nis True\n\n\nscale\nbool\nWhether to divide each layer by the standard deviation of the layer.\nis True\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, a data type is set based on the data type of the prediction.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for operation.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nPyspatialml.Raster object with rescaled data.\n\n\n\n\n\n\n\n\nRaster.set_block_shape(value)\nSet the block shape of the raster, i.e. the height and width of windows to read in chunks for the predict, predict_proba, apply, and other supported-methods.\nNote block shape can also be set with myraster.block_shape = (500, 500)\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nvalue\ntuple\nA tuple of (height, width) for the block window\nrequired\n\n\n\n\n\n\n\nRaster.tail()\nReturn the last 10 rows from the Raster as a ndarray\n\n\n\nRaster.to_crs(crs, resampling='nearest', file_path=None, in_memory=False, driver='GTiff', nodata=None, n_jobs=1, warp_mem_lim=0, progress=False, **kwargs)\nReprojects a Raster object to a different crs.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncrs\nrasterio.transform.CRS object, or dict\nExample: CRS({‘init’: ‘EPSG:4326’})\nrequired\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use. One of the following: nearest, bilinear, cubic, cubic_spline, lanczos, average, mode, max (GDAL >= 2.2), min (GDAL >= 2.2), med (GDAL >= 2.2), q1 (GDAL >= 2.2), q3 (GDAL >= 2.2)\n'nearest'\n\n\nfile_path\nstr (optional\nOptional path to save reprojected Raster object. If not specified then a tempfile is used.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then the existing nodata value of the Raster object is used, which can accommodate the dtypes of the individual layers in the Raster.\nNone)\n\n\nn_jobs\nint (default 1)\nThe number of warp worker threads.\n1\n\n\nwarp_mem_lim\nint (default 0)\nThe warp operation memory limit in MB. Larger values allow the warp operation to be carried out in fewer chunks. The amount of memory required to warp a 3-band uint8 2000 row x 2000 col raster to a destination of the same size is approximately 56 MB. The default (0) means 64 MB with GDAL 2.2.\n0\n\n\nprogress\nbool (default False)\nOptionally show progress of transform operations.\nFalse\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster following reprojection.\n\n\n\n\n\n\n\nRaster.to_pandas(max_pixels=None, resampling='nearest')\nRaster to pandas DataFrame.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\n\nMaximum number of pixels to sample. By default all pixels are used.\nNone\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use when applying decimated reads when out_shape is specified. Supported methods are: ‘average’, ‘bilinear’, ‘cubic’, ‘cubic_spline’, ‘gauss’, ‘lanczos’, ‘max’, ‘med’, ‘min’, ‘mode’, ‘q1’, ‘q3’.\n'nearest'\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npandas.pandas.DataFrame\nDataFrame containing values of names of RasterLayers in the Raster as columns, and pixel values as rows.\n\n\n\n\n\n\n\nRaster.write(file_path, driver='GTiff', dtype=None, nodata=None, **kwargs)\nWrite the Raster object to a file.\nOverrides the write RasterBase class method, which is a partial function of the rasterio.DatasetReader.write method.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfile_path\nstr\nFile path used to save the Raster object.\nrequired\n\n\ndriver\nstr (default is ‘GTiff’).\nName of GDAL driver used to save Raster data.\n'GTiff'\n\n\ndtype\nstr (opt\nOptionally specify a numpy compatible data type when saving to file. If not specified, a data type is selected based on the data types of RasterLayers in the Raster object.\nNone)\n\n\nnodata\nany number (opt\nOptionally assign a new nodata value when saving to file. If not specified a nodata value based on the minimum permissible value for the data types of RasterLayers in the Raster object is used. Note that this does not change the pixel nodata values of the raster, it only changes the metadata of what value represents a nodata pixel.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nNew Raster object from saved file.",
"crumbs": [
- "Tutorials",
- "Multitarget regression",
- "Multi-Target Spatial Prediction using the Meuse Dataset"
+ "Reference",
+ "Raster datasets",
+ "Raster"
]
},
{
- "objectID": "docs/multitarget-regression-soil-properties.html#feature-engineering",
- "href": "docs/multitarget-regression-soil-properties.html#feature-engineering",
- "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
- "section": "Feature Engineering",
- "text": "Feature Engineering\nWe want the prediction results to be depend on the spatial locations of the training data. So to include spatial information, coordinate grids can be generated and added to the Raster object:\n\nxy_layer = xy_coordinates(\n layer=stack.iloc[0], \n file_path=NamedTemporaryFile(suffix=\".tif\").name\n)\n\n\nxy_layer = xy_coordinates(\n layer=stack.iloc[0], \n file_path=NamedTemporaryFile(suffix=\".tif\").name\n)\n\nedms = distance_to_corners(\n layer=stack.iloc[0], \n file_path=NamedTemporaryFile(suffix=\".tif\").name\n)\nedms.rename(\n {old: new for (old, new) in zip(edms.names, [\"tl\", \"tr\", \"bl\", \"br\", \"c\"])},\n in_place=True\n)\n\nedms.plot()\nplt.show()\n\n\n\n\n\n\n\n\nAppend them to the Raster object:\n\nstack = stack.append([xy_layer, edms])\n\nPlot the new predictors:\n\naxs = stack.plot(figsize=(9, 7))\nax = axs.flatten()[10]\nim = ax.images\nim[0].colorbar.set_ticks([1,2,3])\nax = axs.flatten()[8]\nax.tick_params(axis='x', labelrotation=65)\n\nplt.tight_layout()\nplt.show()\n\n\n\n\n\n\n\n\nThe area that is filled by some of the grids is different. This doesn’t matter for the prediction because pixels in the Raster object that include some NaNs in some of the layers will be removed. However, the plots could potentially be given a cleaner look. We can use the Raster.intersect method to fix this:\n\nstack = stack.intersect()\n\n\naxs = stack.plot(figsize=(9, 7))\nax = axs.flatten()[10]\nim = ax.images\nim[0].colorbar.set_ticks([1,2,3])\nax = axs.flatten()[8]\nax.tick_params(axis='x', labelrotation=65)\n\nplt.tight_layout()\nplt.show()",
+ "objectID": "reference/Raster.html#attributes",
+ "href": "reference/Raster.html#attributes",
+ "title": "Raster",
+ "section": "",
+ "text": "Name\nType\nDescription\n\n\n\n\nfiles\nlist\nA list of the raster dataset files that are used in the Raster. This does not have to be the same length as the number of RasterLayers because some files may have multiple bands.\n\n\nmeta\ndict\nA dict containing the raster metadata. The dict contains the following keys/values: crs : the crs object transform : the Affine.affine transform object width : width of the Raster in pixels height : height of the Raster in pixels count : number of RasterLayers within the Raster dtype : the numpy datatype that represents lowest common denominator of the different dtypes for all of the layers in the Raster.\n\n\nnames\nlist\nA list of the RasterLayer names.\n\n\nblock_shape\ntuple\nThe default block_shape in (rows, cols) for reading windows of data in the Raster for out-of-memory processing.",
"crumbs": [
- "Tutorials",
- "Multitarget regression",
- "Multi-Target Spatial Prediction using the Meuse Dataset"
+ "Reference",
+ "Raster datasets",
+ "Raster"
+ ]
+ },
+ {
+ "objectID": "reference/Raster.html#methods",
+ "href": "reference/Raster.html#methods",
+ "title": "Raster",
+ "section": "",
+ "text": "Name\nDescription\n\n\n\n\naggregate\nAggregates a raster to (usually) a coarser grid cell size.\n\n\nalter\nApply a fitted scikit-learn transformer to a Raster object.\n\n\nappend\nMethod to add new RasterLayers to a Raster object.\n\n\napply\nApply user-supplied function to a Raster object.\n\n\nblock_shapes\nGenerator for windows for optimal reading and writing based\n\n\nclose\nClose all of the RasterLayer objects in the Raster.\n\n\ncopy\nCreates a shallow copy of a Raster object\n\n\ncrop\nCrops a Raster object by the supplied bounds.\n\n\ndrop\nDrop individual RasterLayers from a Raster object\n\n\nextract_raster\nSample a Raster object by an aligned raster of labelled pixels.\n\n\nextract_vector\nSample a Raster/RasterLayer using a geopandas GeoDataframe\n\n\nextract_xy\nSamples pixel values using an array of xy locations.\n\n\nhead\nReturn the first 10 rows from the Raster as a ndarray\n\n\nintersect\nPerform a intersect operation on the Raster object.\n\n\nmask\nMask a Raster object based on the outline of shapes in a\n\n\npredict\nApply prediction of a scikit learn model to a Raster.\n\n\npredict_proba\nApply class probability prediction of a scikit learn model to a Raster.\n\n\nread\nReads data from the Raster object into a numpy array.\n\n\nrename\nRename a RasterLayer within the Raster object.\n\n\nsample\nGenerates a random sample of according to size, and samples\n\n\nscale\nStandardize (centre and scale) a Raster object by\n\n\nset_block_shape\nSet the block shape of the raster, i.e. the height and width\n\n\ntail\nReturn the last 10 rows from the Raster as a ndarray\n\n\nto_crs\nReprojects a Raster object to a different crs.\n\n\nto_pandas\nRaster to pandas DataFrame.\n\n\nwrite\nWrite the Raster object to a file.\n\n\n\n\n\nRaster.aggregate(out_shape, resampling='nearest', file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nAggregates a raster to (usually) a coarser grid cell size.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nout_shape\ntuple\nNew shape in (rows, cols).\nrequired\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use when applying decimated reads when out_shape is specified. Supported methods are: ‘average’, ‘bilinear’, ‘cubic’, ‘cubic_spline’, ‘gauss’, ‘lanczos’, ‘max’, ‘med’, ‘min’, ‘mode’, ‘q1’, ‘q3’.\n'nearest'\n\n\nfile_path\nstr (optional\nFile path to save to cropped raster. If not supplied then the aggregated raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new intersected Raster is created using the dtype of the existing Raster dataset, which uses a dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s dtype. Note that this does not change the pixel nodata values of the raster, it only changes the metadata of what value represents a nodata pixel.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster object aggregated to a new pixel size.\n\n\n\n\n\n\n\nRaster.alter(transformer, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False)\nApply a fitted scikit-learn transformer to a Raster object.\nCan be used to transform a raster using methods such as StandardScaler, RobustScaler etc.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ntransformer\na sklearn.preprocessing.Transformer object\n\nrequired\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, a data type is set based on the data type of the prediction.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for operation.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nPyspatialml.Raster object with transformed data.\n\n\n\n\n\n\n\n\nRaster.append(other, in_place=False)\nMethod to add new RasterLayers to a Raster object.\nNote that this modifies the Raster object in-place by default.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nother\nRaster object, or list of Raster objects\nObject to append to the Raster.\nrequired\n\n\nin_place\nbool (default False)\nWhether to change the Raster object in-place or leave original and return a new Raster object.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nReturned only if in_place is False\n\n\n\n\n\n\n\nRaster.apply(function, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False, function_args={}, **kwargs)\nApply user-supplied function to a Raster object.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfunction\nfunction\nFunction that takes an numpy array as a single argument.\nrequired\n\n\nfile_path\nstr (optional\nOptional path to save calculated Raster object. If not specified then a tempfile is used.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new Raster is created using the dtype of the calculation result.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this changes the values of the pixels that represent nodata pixels.\nNone)\n\n\nprogress\nbool (default False)\nOptionally show progress of transform operations.\nFalse\n\n\nfunction_args\ndict(optional)\nOptionally pass arguments to the function as a dict or keyword arguments.\n{}\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster containing the calculated result.\n\n\n\n\n\n\n\nRaster.block_shapes(rows, cols)\nGenerator for windows for optimal reading and writing based on the raster format Windows and returns as a tuple with xoff, yoff, width, height.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nrows\nint\nHeight of window in rows.\nrequired\n\n\ncols\nint\nWidth of window in columns.\nrequired\n\n\n\n\n\n\n\nRaster.close()\nClose all of the RasterLayer objects in the Raster.\nNote that this will cause any rasters based on temporary files to be removed. This is intended as a method of clearing temporary files that may have accumulated during an analysis session.\n\n\n\nRaster.copy(subset=None)\nCreates a shallow copy of a Raster object\nNote that shallow in the context of a Raster object means that an immutable copy of the object is made, however the on-disk and in-memory file locations remain the same.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsubset\nopt\nA list of layer names to subset while copying.\nNone\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\n\n\n\n\n\n\n\n\nRaster.crop(bounds, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nCrops a Raster object by the supplied bounds.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nbounds\ntuple\nA tuple containing the bounding box to clip by in the form of (xmin, ymin, xmax, ymax).\nrequired\n\n\nfile_path\nstr (optional\nFile path to save to cropped raster. If not supplied then the cropped raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’). Default is ‘GTiff’\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new intersected Raster is created using the dtype of theexisting Raster dataset, which uses a dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this does not change the pixel nodata values of the raster, it only changes the metadata of what value represents a nodata pixel.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster cropped to new extent.\n\n\n\n\n\n\n\nRaster.drop(labels, in_place=False)\nDrop individual RasterLayers from a Raster object\nNote that this modifies the Raster object in-place by default.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlabels\nsingle label or list-like\nIndex (int) or layer name to drop. Can be a single integer or label, or a list of integers or labels.\nrequired\n\n\nin_place\nbool (default False)\nWhether to change the Raster object in-place or leave original and return a new Raster object.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nReturned only if in_place is True\n\n\n\n\n\n\n\nRaster.extract_raster(src, progress=False)\nSample a Raster object by an aligned raster of labelled pixels.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsrc\n\nSingle band raster containing labelled pixels as an open rasterio DatasetReader object.\nrequired\n\n\nprogress\nbool(opt)\nShow a progress bar for extraction.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\ngeopandas.geopandas.GeoDataFrame\nGeodataframe containing extracted data as point features if return_array=False\n\n\n\n\n\n\n\nRaster.extract_vector(gdf, progress=False)\nSample a Raster/RasterLayer using a geopandas GeoDataframe containing points, lines or polygon features.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ngdf\n\nContaining either point, line or polygon geometries. Overlapping geometries will cause the same pixels to be sampled.\nrequired\n\n\nprogress\nbool(opt)\nShow a progress bar for extraction.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\ngeopandas.geopandas.GeoDataframe\nContaining extracted data as point geometries (one point per pixel). The resulting GeoDataFrame is indexed using a named pandas.MultiIndex, with pixel_idx index referring to the index of each pixel that was sampled, and the geometry_idx index referring to the index of the each geometry in the supplied gdf. This makes it possible to keep track of how sampled pixel relates to the original geometries, i.e. multiple pixels being extracted within the area of a single polygon that can be referred to using the geometry_idx. The extracted data can subsequently be joined with the attribute table of the supplied gdf using: training_py = geopandas.read_file(nc.polygons) df = self.stack.extract_vector(gdf=training_py) df = df.dropna() df = df.merge( right=training_py.loc[:, (“id”, “label”)], left_on=“polygon_idx”, right_on=“id”, right_index=True )\n\n\n\n\n\n\n\nRaster.extract_xy(xys, return_array=False, progress=False)\nSamples pixel values using an array of xy locations.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nxys\n2d array-like\nx and y coordinates from which to sample the raster (n_samples, xys).\nrequired\n\n\nreturn_array\nbool(opt)\nBy default the extracted pixel values are returned as a geopandas.GeoDataFrame. If return_array=True then the extracted pixel values are returned as a tuple of numpy.ndarrays.\nFalse\n\n\nprogress\nbool(opt)\nShow a progress bar for extraction.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\ngeopandas.geopandas.GeoDataframe\nContaining extracted data as point geometries if return_array=False.\n\n\nnumpy.numpy.ndarray\n2d masked array containing sampled raster values (sample, bands) at the x,y locations.\n\n\n\n\n\n\n\nRaster.head()\nReturn the first 10 rows from the Raster as a ndarray\n\n\n\nRaster.intersect(file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nPerform a intersect operation on the Raster object.\nComputes the geometric intersection of the RasterLayers with the Raster object. This will cause nodata values in any of the rasters to be propagated through all of the output rasters.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfile_path\nstr (optional\nFile path to save to resulting Raster. If not supplied then the resulting Raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the new intersected Raster is created using the dtype of the existing Raster dataset, which uses a dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this changes the values of the pixels that represent nodata to the new value.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster with layers that are masked based on a union of all masks in the suite of RasterLayers.\n\n\n\n\n\n\n\nRaster.mask(shapes, invert=False, crop=True, pad=False, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, **kwargs)\nMask a Raster object based on the outline of shapes in a geopandas.GeoDataFrame\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nshapes\ngeopandas.geopandas.GeoDataFrame\nGeoDataFrame containing masking features.\nrequired\n\n\ninvert\nbool (default False)\nIf False then pixels outside shapes will be masked. If True then pixels inside shape will be masked.\nFalse\n\n\ncrop\nbool (default True)\nCrop the raster to the extent of the shapes.\nTrue\n\n\npad\nbool (default False)\nIf True, the features will be padded in each direction by one half of a pixel prior to cropping raster.\nFalse\n\n\nfile_path\nstr (optional\nFile path to save to resulting Raster. If not supplied then the resulting Raster is saved to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nCoerce RasterLayers to the specified dtype. If not specified then the cropped Raster is created using the existing dtype, which usesa dtype that can accommodate the data types of all of the individual RasterLayers.\nNone)\n\n\nnodata\nany number (optional\nNodata value for cropped dataset. If not specified then a nodata value is set based on the minimum permissible value of the Raster’s data type. Note that this changes the values of the pixels to the new nodata value, and changes the metadata of the raster.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nRaster with masked layers.\n\n\n\n\n\n\n\nRaster.predict(estimator, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False, constants=None, **kwargs)\nApply prediction of a scikit learn model to a Raster.\nThe model can represent any scikit learn model or compatible api with a fit and predict method. These can consist of classification or regression models. Multi-class classifications and multi-target regressions are also supported.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nestimator\nestimator object implementing ‘fit’\nThe object to use to fit the data.\nrequired\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, np.float32 is assumed.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for prediction.\nFalse\n\n\nconstants\n\nConstant features to add to the Raster object with each value in a list or 1d ndarray representing an additional feature. If a list-like object of values os passed, then each numeric value will be appended as constant features to the last columns in the data. It is therefore important that all features including constant features are present in the same order as what was used to train the model. If a dict is passed, then the keys of the dict must refer to the names of raster layers in the Raster object. In this case, the values of the dict will replace the values of the raster layers in the Raster object.\nNone\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster object containing prediction results as a RasterLayers. For classification and regression models, the Raster will contain a single RasterLayer, unless the model is multi-class or multi-target. Layers are named automatically as pred_raw_n with n = 1, 2, 3 ..n.\n\n\n\n\n\n\n\nRaster.predict_proba(estimator, file_path=None, in_memory=False, indexes=None, driver='GTiff', dtype=None, nodata=None, constants=None, progress=False, **kwargs)\nApply class probability prediction of a scikit learn model to a Raster.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nestimator\nestimator object implementing ‘fit’\nThe object to use to fit the data.\nrequired\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\nindexes\nlist of integers (optional\nList of class indices to export. In some circumstances, only a subset of the class probability estimations are desired, for instance when performing a binary classification only the probabilities for the positive class may be desired.\nNone)\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, a data type is set based on the data type of the prediction.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for prediction.\nFalse\n\n\nconstants\n\nConstant features to add to the Raster object with each value in a list or 1d ndarray representing an additional feature. If a list-like object of values os passed, then each numeric value will be appended as constant features to the last columns in the data. It is therefore important that all features including constant features are present in the same order as what was used to train the model. If a dict is passed, then the keys of the dict must refer to the names of raster layers in the Raster object. In this case, the values of the dict will replace the values of the raster layers in the Raster object.\nNone\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster containing predicted class probabilities. Each predicted class is represented by a RasterLayer object. The RasterLayers are named prob_n for 1,2,3..n, with n based on the index position of the classes, not the number of the class itself. For example, a classification model predicting classes with integer values of 1, 3, and 5 would result in three RasterLayers named ‘prob_1’, ‘prob_2’ and ‘prob_3’.\n\n\n\n\n\n\n\nRaster.read(masked=False, window=None, out_shape=None, resampling='nearest', as_df=False, **kwargs)\nReads data from the Raster object into a numpy array.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmasked\nbool (default False)\nRead data into a masked array.\nFalse\n\n\nwindow\nrasterio.window.Window object (optional\nTuple of col_off, row_off, width, height of a window of data to read a chunk of data into a ndarray.\nNone)\n\n\nout_shape\ntuple (optional\nShape of shape of array (rows, cols) to read data into using decimated reads.\nNone)\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use when applying decimated reads when out_shape is specified. Supported methods are: ‘average’, ‘bilinear’, ‘cubic’, ‘cubic_spline’, ‘gauss’, ‘lanczos’, ‘max’, ‘med’, ‘min’, ‘mode’, ‘q1’, ‘q3’.\n'nearest'\n\n\nas_df\nbool (default False)\nWhether to return the data as a pandas.DataFrame with columns named by the RasterLayer names.\nFalse\n\n\n**kwargs\ndict\nOther arguments to pass to rasterio.DatasetReader.read method\n{}\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nndarray\nRaster values in 3d ndarray with the dimensions in order of (band, row, and column).\n\n\n\n\n\n\n\nRaster.rename(names, in_place=False)\nRename a RasterLayer within the Raster object.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nnames\ndict\ndict of old_name : new_name\nrequired\n\n\nin_place\nbool (default False)\nWhether to change names of the Raster object in-place or leave original and return a new Raster object.\nFalse\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nReturned only if in_place is False\n\n\n\n\n\n\n\nRaster.sample(size, strata=None, return_array=False, random_state=None)\nGenerates a random sample of according to size, and samples the pixel values.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nsize\nint\nNumber of random samples or number of samples per strata if a strata object is supplied.\nrequired\n\n\nstrata\npyspatialml Raster object (opt)\nWhether to use stratified instead of random sampling. Strata can be supplied using another pyspatialml.Raster object.\nNone\n\n\nreturn_array\nbool(opt)\nOptionally return extracted data as separate X and xy masked numpy arrays.\nFalse\n\n\nrandom_state\nint(opt)\ninteger to use within random.seed.\nNone\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npandas.pandas.DataFrame\nDataFrame containing values of names of RasterLayers in the Raster if return_array is False.\n\n\ntuple\nA tuple containing two elements if return_array is True: - numpy.ndarray Numpy array of extracted raster values, typically 2d. - numpy.ndarray 2D numpy array of xy coordinates of extracted values.\n\n\n\n\n\n\n\nRaster.scale(centre=True, scale=True, file_path=None, in_memory=False, driver='GTiff', dtype=None, nodata=None, progress=False)\nStandardize (centre and scale) a Raster object by subtracting the mean and dividing by the standard deviation for each layer in the object.\nThe mean and standard deviation statistics are calculated for each layer separately.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncentre\nbool\nWhether to subtract the mean from each layer.\nis True\n\n\nscale\nbool\nWhether to divide each layer by the standard deviation of the layer.\nis True\n\n\nfile_path\nstr (optional\nPath to a GeoTiff raster for the prediction results. If not specified then the output is written to a temporary file.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\ndtype\nstr (optional\nOptionally specify a GDAL compatible data type when saving to file. If not specified, a data type is set based on the data type of the prediction.\nNone)\n\n\nnodata\nany number (optional\nNodata value for file export. If not specified then the nodata value is derived from the minimum permissible value for the given data type.\nNone)\n\n\nprogress\nbool (default False)\nShow progress bar for operation.\nFalse\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nPyspatialml.Raster object with rescaled data.\n\n\n\n\n\n\n\n\nRaster.set_block_shape(value)\nSet the block shape of the raster, i.e. the height and width of windows to read in chunks for the predict, predict_proba, apply, and other supported-methods.\nNote block shape can also be set with myraster.block_shape = (500, 500)\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nvalue\ntuple\nA tuple of (height, width) for the block window\nrequired\n\n\n\n\n\n\n\nRaster.tail()\nReturn the last 10 rows from the Raster as a ndarray\n\n\n\nRaster.to_crs(crs, resampling='nearest', file_path=None, in_memory=False, driver='GTiff', nodata=None, n_jobs=1, warp_mem_lim=0, progress=False, **kwargs)\nReprojects a Raster object to a different crs.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncrs\nrasterio.transform.CRS object, or dict\nExample: CRS({‘init’: ‘EPSG:4326’})\nrequired\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use. One of the following: nearest, bilinear, cubic, cubic_spline, lanczos, average, mode, max (GDAL >= 2.2), min (GDAL >= 2.2), med (GDAL >= 2.2), q1 (GDAL >= 2.2), q3 (GDAL >= 2.2)\n'nearest'\n\n\nfile_path\nstr (optional\nOptional path to save reprojected Raster object. If not specified then a tempfile is used.\nNone)\n\n\nin_memory\nbool\nWhether to initiated the Raster from an array and store the data in-memory using Rasterio’s in-memory files.\nis False\n\n\ndriver\nstr (default ‘GTiff’)\nNamed of GDAL-supported driver for file export.\n'GTiff'\n\n\nnodata\nany number (optional\nNodata value for new dataset. If not specified then the existing nodata value of the Raster object is used, which can accommodate the dtypes of the individual layers in the Raster.\nNone)\n\n\nn_jobs\nint (default 1)\nThe number of warp worker threads.\n1\n\n\nwarp_mem_lim\nint (default 0)\nThe warp operation memory limit in MB. Larger values allow the warp operation to be carried out in fewer chunks. The amount of memory required to warp a 3-band uint8 2000 row x 2000 col raster to a destination of the same size is approximately 56 MB. The default (0) means 64 MB with GDAL 2.2.\n0\n\n\nprogress\nbool (default False)\nOptionally show progress of transform operations.\nFalse\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nRaster following reprojection.\n\n\n\n\n\n\n\nRaster.to_pandas(max_pixels=None, resampling='nearest')\nRaster to pandas DataFrame.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\n\nMaximum number of pixels to sample. By default all pixels are used.\nNone\n\n\nresampling\nstr (default ‘nearest’)\nResampling method to use when applying decimated reads when out_shape is specified. Supported methods are: ‘average’, ‘bilinear’, ‘cubic’, ‘cubic_spline’, ‘gauss’, ‘lanczos’, ‘max’, ‘med’, ‘min’, ‘mode’, ‘q1’, ‘q3’.\n'nearest'\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npandas.pandas.DataFrame\nDataFrame containing values of names of RasterLayers in the Raster as columns, and pixel values as rows.\n\n\n\n\n\n\n\nRaster.write(file_path, driver='GTiff', dtype=None, nodata=None, **kwargs)\nWrite the Raster object to a file.\nOverrides the write RasterBase class method, which is a partial function of the rasterio.DatasetReader.write method.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfile_path\nstr\nFile path used to save the Raster object.\nrequired\n\n\ndriver\nstr (default is ‘GTiff’).\nName of GDAL driver used to save Raster data.\n'GTiff'\n\n\ndtype\nstr (opt\nOptionally specify a numpy compatible data type when saving to file. If not specified, a data type is selected based on the data types of RasterLayers in the Raster object.\nNone)\n\n\nnodata\nany number (opt\nOptionally assign a new nodata value when saving to file. If not specified a nodata value based on the minimum permissible value for the data types of RasterLayers in the Raster object is used. Note that this does not change the pixel nodata values of the raster, it only changes the metadata of what value represents a nodata pixel.\nNone)\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.raster.Raster\nNew Raster object from saved file.",
+ "crumbs": [
+ "Reference",
+ "Raster datasets",
+ "Raster"
]
},
{
- "objectID": "docs/multitarget-regression-soil-properties.html#read-the-meuse-dataset",
- "href": "docs/multitarget-regression-soil-properties.html#read-the-meuse-dataset",
- "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
- "section": "Read the Meuse Dataset",
- "text": "Read the Meuse Dataset\n\ntraining_pts = gpd.read_file(training_pts_file)\ntraining_pts.head()\n\n\n\n\n\n\n\n\n\ncadmium\ncopper\nlead\nzinc\nelev\ndist\nom\nffreq\nsoil\nlime\nlanduse\ndist.m\ngeometry\n\n\n\n\n0\n11.7\n85.0\n299.0\n1022.0\n7.909\n0.001358\n13.6\n1\n1\n1\nAh\n50.0\nPOINT (181072.000 333611.000)\n\n\n1\n8.6\n81.0\n277.0\n1141.0\n6.983\n0.012224\n14.0\n1\n1\n1\nAh\n30.0\nPOINT (181025.000 333558.000)\n\n\n2\n6.5\n68.0\n199.0\n640.0\n7.800\n0.103029\n13.0\n1\n1\n1\nAh\n150.0\nPOINT (181165.000 333537.000)\n\n\n3\n2.6\n81.0\n116.0\n257.0\n7.655\n0.190094\n8.0\n1\n2\n0\nGa\n270.0\nPOINT (181298.000 333484.000)\n\n\n4\n2.8\n48.0\n117.0\n269.0\n7.480\n0.277090\n8.7\n1\n2\n0\nAh\n380.0\nPOINT (181307.000 333330.000)\n\n\n\n\n\n\n\n\nPlot the training points:\n\nfrom mpl_toolkits.axes_grid1 import make_axes_locatable\n\nfig, axs = plt.subplots(2, 3, figsize=(8.5, 7))\n\nfor i, (ax, target) in enumerate(zip(axs.ravel(), ['cadmium', 'copper', 'lead', 'zinc', 'om'])):\n ax.set_title(target.title())\n divider = make_axes_locatable(ax)\n cax = divider.append_axes(\"right\", size=\"10%\", pad=0.05)\n training_pts.plot(column=target, legend=True, ax=ax, cax=cax, cmap='viridis')\n \n if i != 0:\n ax.set_yticklabels([])\n \n if i != 3:\n ax.set_xticklabels([])\n else:\n ax.tick_params(axis='x', labelrotation=65)\n \nfig.delaxes(axs.flatten()[i+1])\nplt.tight_layout()\nplt.show()",
+ "objectID": "reference/index.html",
+ "href": "reference/index.html",
+ "title": "Function reference",
+ "section": "",
+ "text": "Raster is a class for reading and writing raster datasets\n\n\n\nRaster\nCreates a collection of file-based GDAL-supported raster\n\n\nRasterLayer\nRepresents a single raster band derived from a single or\n\n\n\n\n\n\nTools for working with vector datasets\n\n\n\nvector\n\n\n\n\n\n\n\nPreprocessing tools for raster datasets\n\n\n\npreprocessing"
+ },
+ {
+ "objectID": "reference/index.html#raster-datasets",
+ "href": "reference/index.html#raster-datasets",
+ "title": "Function reference",
+ "section": "",
+ "text": "Raster is a class for reading and writing raster datasets\n\n\n\nRaster\nCreates a collection of file-based GDAL-supported raster\n\n\nRasterLayer\nRepresents a single raster band derived from a single or"
+ },
+ {
+ "objectID": "reference/index.html#vector-tools",
+ "href": "reference/index.html#vector-tools",
+ "title": "Function reference",
+ "section": "",
+ "text": "Tools for working with vector datasets\n\n\n\nvector"
+ },
+ {
+ "objectID": "reference/index.html#preprocessing",
+ "href": "reference/index.html#preprocessing",
+ "title": "Function reference",
+ "section": "",
+ "text": "Preprocessing tools for raster datasets\n\n\n\npreprocessing"
+ },
+ {
+ "objectID": "reference/preprocessing.html",
+ "href": "reference/preprocessing.html",
+ "title": "preprocessing",
+ "section": "",
+ "text": "preprocessing\n\n\n\n\n\nName\nDescription\n\n\n\n\ndistance_to_corners\nGenerate buffer distances to corner and centre coordinates of raster\n\n\ndistance_to_samples\nGenerate buffer distances to x,y coordinates.\n\n\none_hot_encode\nOne-hot encoding of a RasterLayer.\n\n\nrotated_coordinates\nGenerate 2d arrays with n_angles rotated coordinates.\n\n\nxy_coordinates\nFill 2d arrays with their x,y indices.\n\n\n\n\n\npreprocessing.distance_to_corners(layer, file_path, driver='GTiff')\nGenerate buffer distances to corner and centre coordinates of raster extent.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\n\nrequired\n\n\nfile_path\nstr\nFile path to save to the resulting Raster object\nrequired\n\n\ndriver\n(str, optional.optional.Default is GTiff)\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.Raster object\n\n\n\n\n\n\n\n\npreprocessing.distance_to_samples(layer, file_path, rows, cols, driver='GTiff')\nGenerate buffer distances to x,y coordinates.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\nRasterLayer to use as a template.\nrequired\n\n\nfile_path\nstr\nFile path to save to the resulting Raster object.\nrequired\n\n\nrows\n1d numpy array\narray of row indexes.\nrequired\n\n\ncols\n1d numpy array\narray of column indexes.\nrequired\n\n\ndriver\nstr\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.Raster object\n\n\n\n\n\n\n\n\npreprocessing.one_hot_encode(layer, file_path, categories=None, driver='GTiff')\nOne-hot encoding of a RasterLayer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.pyspatialml.RasterLayer\nContaining categories to perform one-hot encoding on.\nrequired\n\n\nfile_path\nstr\nFile path to save one-hot encoded raster.\nrequired\n\n\ncategories\n(list, ndarray)\nOptional list of categories to extract. Default performs one-hot encoding on all categorical values in the input layer.\nNone\n\n\ndriver\n(str, options.options.Default is GTiff)\nGDAL-compatible driver.\n'GTiff'\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nEach categorical value is encoded as a layer with a Raster object.\n\n\n\n\n\n\n\npreprocessing.rotated_coordinates(layer, file_path, n_angles=8, driver='GTiff')\nGenerate 2d arrays with n_angles rotated coordinates.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\nRasterLayer to use as a template.\nrequired\n\n\nn_angles\n(int, optional.optional.Default is 8)\nNumber of angles to rotate coordinate system by.\n8\n\n\ndriver\n(str, optional.optional.Default is GTiff)\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\n\n\n\n\n\n\n\n\npreprocessing.xy_coordinates(layer, file_path, driver='GTiff')\nFill 2d arrays with their x,y indices.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\nRasterLayer to use as a template.\nrequired\n\n\nfile_path\nstr\nFile path to save to the resulting Raster object.s\nrequired\n\n\ndriver\n(str, options.options.Default is GTiff)\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.Raster object",
"crumbs": [
- "Tutorials",
- "Multitarget regression",
- "Multi-Target Spatial Prediction using the Meuse Dataset"
+ "Reference",
+ "Preprocessing",
+ "preprocessing"
]
},
{
- "objectID": "docs/multitarget-regression-soil-properties.html#extract-raster-values-at-the-training-point-locations",
- "href": "docs/multitarget-regression-soil-properties.html#extract-raster-values-at-the-training-point-locations",
- "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
- "section": "Extract Raster Values at the Training Point Locations",
- "text": "Extract Raster Values at the Training Point Locations\nPixel values from a Raster object can be extracted using geometries within a geopandas.GeoDataFrame (points, lines, polygons) or by using labelled pixels from another raster with the same dimensions and crs.\nBy default the extracted values are returned as a geopandas.GeoDataFrame that contains the data and the coordinates of the pixels:\n\ntraining_df = stack.extract_vector(gdf=training_pts)\n\ntraining_df.index = training_df.index.get_level_values(\"geometry_idx\")\ntraining_df = training_df.merge(\n training_pts.loc[:, (\"lead\", \"cadmium\", \"copper\", \"zinc\", \"om\")], \n left_index=True, \n right_index=True\n) \n\n\ntraining_df = training_df.dropna()\ntraining_df.head()\n\n\n\n\n\n\n\n\n\nchnl_dist\ndem\ndist\nffreq\nlandimg2\nlandimg3\nlandimg4\nmrvbf\nrsp\nslope\n...\ntr\nbl\nbr\nc\ngeometry\nlead\ncadmium\ncopper\nzinc\nom\n\n\ngeometry_idx\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n0\n0.000000\n3214.0\n0.001358\n1.0\n97.0\n92.0\n192.0\n3.523824e-06\n0.000000\n1.423307\n...\n12.369317\n119.268608\n100.717430\n55.470715\nPOINT (181072.000 333611.000)\n299.0\n11.7\n85.0\n1022.0\n13.6\n\n\n1\n79.849854\n3402.0\n0.012224\n1.0\n160.0\n183.0\n183.0\n9.879866e-06\n0.082085\n1.286004\n...\n13.928389\n117.046997\n98.858482\n53.235325\nPOINT (181025.000 333558.000)\n277.0\n8.6\n81.0\n1141.0\n14.0\n\n\n2\n0.000000\n3277.0\n0.103029\n1.0\n178.0\n209.0\n179.0\n1.340742e-03\n0.000000\n0.674711\n...\n10.295630\n119.281181\n98.412399\n55.226807\nPOINT (181165.000 333537.000)\n199.0\n6.5\n68.0\n640.0\n13.0\n\n\n3\n184.743164\n3563.0\n0.190094\n1.0\n114.0\n135.0\n152.0\n6.547428e-07\n0.192325\n1.413479\n...\n8.485281\n120.208153\n97.185387\n56.035702\nPOINT (181298.000 333484.000)\n116.0\n2.6\n81.0\n257.0\n8.0\n\n\n4\n16.768555\n3406.0\n0.277090\n1.0\n133.0\n154.0\n151.0\n1.588824e-03\n0.016689\n0.531276\n...\n11.661903\n117.004272\n93.193344\n52.801514\nPOINT (181307.000 333330.000)\n117.0\n2.8\n48.0\n269.0\n8.7\n\n\n\n\n5 rows × 25 columns",
+ "objectID": "reference/preprocessing.html#functions",
+ "href": "reference/preprocessing.html#functions",
+ "title": "preprocessing",
+ "section": "",
+ "text": "Name\nDescription\n\n\n\n\ndistance_to_corners\nGenerate buffer distances to corner and centre coordinates of raster\n\n\ndistance_to_samples\nGenerate buffer distances to x,y coordinates.\n\n\none_hot_encode\nOne-hot encoding of a RasterLayer.\n\n\nrotated_coordinates\nGenerate 2d arrays with n_angles rotated coordinates.\n\n\nxy_coordinates\nFill 2d arrays with their x,y indices.\n\n\n\n\n\npreprocessing.distance_to_corners(layer, file_path, driver='GTiff')\nGenerate buffer distances to corner and centre coordinates of raster extent.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\n\nrequired\n\n\nfile_path\nstr\nFile path to save to the resulting Raster object\nrequired\n\n\ndriver\n(str, optional.optional.Default is GTiff)\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.Raster object\n\n\n\n\n\n\n\n\npreprocessing.distance_to_samples(layer, file_path, rows, cols, driver='GTiff')\nGenerate buffer distances to x,y coordinates.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\nRasterLayer to use as a template.\nrequired\n\n\nfile_path\nstr\nFile path to save to the resulting Raster object.\nrequired\n\n\nrows\n1d numpy array\narray of row indexes.\nrequired\n\n\ncols\n1d numpy array\narray of column indexes.\nrequired\n\n\ndriver\nstr\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.Raster object\n\n\n\n\n\n\n\n\npreprocessing.one_hot_encode(layer, file_path, categories=None, driver='GTiff')\nOne-hot encoding of a RasterLayer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.pyspatialml.RasterLayer\nContaining categories to perform one-hot encoding on.\nrequired\n\n\nfile_path\nstr\nFile path to save one-hot encoded raster.\nrequired\n\n\ncategories\n(list, ndarray)\nOptional list of categories to extract. Default performs one-hot encoding on all categorical values in the input layer.\nNone\n\n\ndriver\n(str, options.options.Default is GTiff)\nGDAL-compatible driver.\n'GTiff'\n\n\n\n\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\nEach categorical value is encoded as a layer with a Raster object.\n\n\n\n\n\n\n\npreprocessing.rotated_coordinates(layer, file_path, n_angles=8, driver='GTiff')\nGenerate 2d arrays with n_angles rotated coordinates.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\nRasterLayer to use as a template.\nrequired\n\n\nn_angles\n(int, optional.optional.Default is 8)\nNumber of angles to rotate coordinate system by.\n8\n\n\ndriver\n(str, optional.optional.Default is GTiff)\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.Raster\n\n\n\n\n\n\n\n\npreprocessing.xy_coordinates(layer, file_path, driver='GTiff')\nFill 2d arrays with their x,y indices.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nlayer\npyspatialml.RasterLayer, or rasterio.DatasetReader\nRasterLayer to use as a template.\nrequired\n\n\nfile_path\nstr\nFile path to save to the resulting Raster object.s\nrequired\n\n\ndriver\n(str, options.options.Default is GTiff)\nGDAL driver to use to save raster.\n'GTiff'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.Raster object",
"crumbs": [
- "Tutorials",
- "Multitarget regression",
- "Multi-Target Spatial Prediction using the Meuse Dataset"
+ "Reference",
+ "Preprocessing",
+ "preprocessing"
]
},
{
- "objectID": "docs/multitarget-regression-soil-properties.html#developing-a-machine-learning-model",
- "href": "docs/multitarget-regression-soil-properties.html#developing-a-machine-learning-model",
- "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
- "section": "Developing a Machine Learning Model",
- "text": "Developing a Machine Learning Model\nHere we are going to create a machine learning pipeline that correctly handles categorical predictors via one-hot encoding:\n\nstack.names\n\ndict_keys(['chnl_dist', 'dem', 'dist', 'ffreq', 'landimg2', 'landimg3', 'landimg4', 'mrvbf', 'rsp', 'slope', 'soil', 'twi', 'x_coordinates', 'y_coordinates', 'tl', 'tr', 'bl', 'br', 'c'])\n\n\n\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.ensemble import ExtraTreesRegressor\nfrom sklearn.preprocessing import OneHotEncoder\nfrom sklearn.compose import ColumnTransformer\n\nsoil_idx = [i for i, name in enumerate(stack.names) if name == 'soil']\n\ntrans = ColumnTransformer([\n ('ohe', OneHotEncoder(categories='auto', handle_unknown='ignore'), soil_idx)\n ], remainder='passthrough')\n\net = ExtraTreesRegressor(n_estimators=500, n_jobs=-1, random_state=1234)\net = Pipeline([\n ('preproc', trans),\n ('regressor', et)])\n\nNow we can separate our response and predictor variables and train the model:\n\nX = training_df.loc[:, stack.names]\ny = training_df.loc[:, ['lead', 'cadmium', 'copper', 'zinc', 'om']]\net.fit(X, y)\n\nPipeline(steps=[('preproc',\n ColumnTransformer(remainder='passthrough',\n transformers=[('ohe',\n OneHotEncoder(handle_unknown='ignore'),\n [10])])),\n ('regressor',\n ExtraTreesRegressor(n_estimators=500, n_jobs=-1,\n random_state=1234))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. Pipeline?Documentation for PipelineiFittedPipeline(steps=[('preproc',\n ColumnTransformer(remainder='passthrough',\n transformers=[('ohe',\n OneHotEncoder(handle_unknown='ignore'),\n [10])])),\n ('regressor',\n ExtraTreesRegressor(n_estimators=500, n_jobs=-1,\n random_state=1234))]) preproc: ColumnTransformer?Documentation for preproc: ColumnTransformerColumnTransformer(remainder='passthrough',\n transformers=[('ohe', OneHotEncoder(handle_unknown='ignore'),\n [10])]) ohe[10] OneHotEncoder?Documentation for OneHotEncoderOneHotEncoder(handle_unknown='ignore') remainder['chnl_dist', 'dem', 'dist', 'ffreq', 'landimg2', 'landimg3', 'landimg4', 'mrvbf', 'rsp', 'slope', 'twi', 'x_coordinates', 'y_coordinates', 'tl', 'tr', 'bl', 'br', 'c'] passthroughpassthrough ExtraTreesRegressor?Documentation for ExtraTreesRegressorExtraTreesRegressor(n_estimators=500, n_jobs=-1, random_state=1234) \n\n\nTo evaluate the performance of the model, we will use 10-fold cross validation:\n\nfrom sklearn.model_selection import cross_validate, KFold\n\nouter = KFold(n_splits=10, shuffle=True, random_state=1234)\nscores = cross_validate(et, X, y, scoring='neg_mean_squared_error', cv=10, n_jobs=1)\nrmse = np.sqrt(-scores['test_score']).mean()\n\nprint(\"Our RMSE score is {}\".format(rmse))\n\nOur RMSE score is 105.19227221271413",
+ "objectID": "reference/RasterLayer.html",
+ "href": "reference/RasterLayer.html",
+ "title": "RasterLayer",
+ "section": "",
+ "text": "RasterLayer(self, band)\nRepresents a single raster band derived from a single or multi-band raster dataset\nSimple wrapper around a rasterio.Band object with additional methods. Used because the Rasterio.Band.ds.read method reads all bands from a multi-band dataset, whereas the RasterLayer read method only reads a single band.\nMethods encapsulated in RasterLayer objects represent those that typically would only be applied to a single-band of a raster, i.e. sieve-clump, distance to non-NaN pixels, or arithmetic operations on individual layers.\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\n\n\n\n\nbidx\nint\nThe band index of the RasterLayer within the file dataset.\n\n\ndtype\nstr\nThe data type of the RasterLayer.\n\n\nds\nrasterio.rasterio.band\nThe underlying rasterio.band object.\n\n\nname\nstr\nA syntactically valid name for the RasterLayer.\n\n\nfile\nstr\nThe file path to the dataset.\n\n\nnodata\nany number\nThe number that is used to represent nodata pixels in the RasterLayer.\n\n\ndriver\nstr\nThe name of the GDAL format driver.\n\n\nmeta\ndict\nA python dict storing the RasterLayer metadata.\n\n\ntransform\naffine.Affine object\nThe affine transform parameters.\n\n\ncount\nint\nNumber of layers; always equal to 1.\n\n\nshape\ntuple\nShape of RasterLayer in (rows, columns)\n\n\nwidth, height\nint\nThe width (cols) and height (rows) of the dataset.\n\n\nbounds\nBoundingBox named tuple\nA named tuple with left, bottom, right and top coordinates of the dataset.\n\n\ncmap\nstr\nThe name of matplotlib map, or a custom matplotlib.cm.LinearSegmentedColormap or ListedColormap object.\n\n\nnorm\nmatplotlib.matplotlib.colors.matplotlib.colors.Normalize(opt)\nA matplotlib.colors.Normalize to apply to the RasterLayer. This overides the norm attribute of the RasterLayer.\n\n\n\n\n\n\n\n\n\nName\nDescription\n\n\n\n\nmax\nMaximum value.\n\n\nmean\nMean value\n\n\nmedian\nMedian value\n\n\nmin\nMinimum value.\n\n\nplot\nPlot a RasterLayer using matplotlib.pyplot.imshow\n\n\nread\nRead method for a single RasterLayer.\n\n\nstddev\nStandard deviation\n\n\nwrite\nWrite method for a single RasterLayer.\n\n\n\n\n\nRasterLayer.max(max_pixels=10000)\nMaximum value.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe maximum value of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.mean(max_pixels=10000)\nMean value\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe mean value of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.median(max_pixels=10000)\nMedian value\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe medium value of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.min(max_pixels=10000)\nMinimum value.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe minimum value of the object\n\n\n\n\n\n\n\nRasterLayer.plot(cmap=None, norm=None, ax=None, cax=None, figsize=None, out_shape=(500, 500), categorical=None, legend=False, vmin=None, vmax=None, fig_kwds=None, legend_kwds=None)\nPlot a RasterLayer using matplotlib.pyplot.imshow\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncmap\nstr (default None)\nThe name of a colormap recognized by matplotlib. Overrides the cmap attribute of the RasterLayer.\nNone\n\n\nnorm\nmatplotlib.matplotlib.colors.matplotlib.colors.Normalize(opt)\nA matplotlib.colors.Normalize to apply to the RasterLayer. This overrides the norm attribute of the RasterLayer.\nNone\n\n\nax\nmatplotlib.pyplot.Artist (optional\naxes instance on which to draw to plot.\nNone)\n\n\ncax\nmatplotlib.pyplot.Artist (optional\naxes on which to draw the legend.\nNone)\n\n\nfigsize\ntuple of integers (optional\nSize of the matplotlib.figure.Figure. If the ax argument is given explicitly, figsize is ignored.\nNone)\n\n\nout_shape\ntuple\nNumber of rows, cols to read from the raster datasets for plotting.\n(500, 500)\n\n\ncategorical\nbool (optional\nif True then the raster values will be considered to represent discrete values, otherwise they are considered to represent continuous values. This overrides the RasterLayer ‘categorical’ attribute. Setting the argument categorical to True is ignored if the RasterLayer.categorical is already True.\nFalse)\n\n\nlegend\nbool (optional\nWhether to plot the legend.\nFalse)\n\n\nvmin\nscale (optional\nvmin and vmax define the data range that the colormap covers. By default, the colormap covers the complete value range of the supplied data. vmin, vmax are ignored if the norm parameter is used.\nNone)\n\n\nxmax\nscale (optional\nvmin and vmax define the data range that the colormap covers. By default, the colormap covers the complete value range of the supplied data. vmin, vmax are ignored if the norm parameter is used.\nNone)\n\n\nfig_kwds\ndict (optional\nAdditional arguments to pass to the matplotlib.pyplot.figure call when creating the figure object. Ignored if ax is passed to the plot function.\nNone)\n\n\nlegend_kwds\ndict (optional\nKeyword arguments to pass to matplotlib.pyplot.colorbar().\nNone)\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nmatplotlib axes instance\n\n\n\n\n\n\n\n\nRasterLayer.read(**kwargs)\nRead method for a single RasterLayer.\nReads the pixel values from a RasterLayer into a ndarray that always will have two dimensions in the order of (rows, columns).\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\n**kwargs\nnamed arguments that can be passed to the the\nrasterio.DatasetReader.read method.\n{}\n\n\n\n\n\n\n\nRasterLayer.stddev(max_pixels=10000)\nStandard deviation\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe standard deviation of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.write(file_path, driver='GTiff', dtype=None, nodata=None, **kwargs)\nWrite method for a single RasterLayer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfile_path\nstr(opt)\nFile path to save the dataset.\nrequired\n\n\ndriver\nstr\nGDAL-compatible driver used for the file format.\n'GTiff'\n\n\ndtype\nstr(opt)\nNumpy dtype used for the file. If omitted then the RasterLayer’s dtype is used.\nNone\n\n\nnodata\nany number (opt)\nA value used to represent the nodata pixels. If omitted then the RasterLayer’s nodata value is used (if assigned already).\nNone\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.RasterLayer",
"crumbs": [
- "Tutorials",
- "Multitarget regression",
- "Multi-Target Spatial Prediction using the Meuse Dataset"
+ "Reference",
+ "Raster datasets",
+ "RasterLayer"
]
},
{
- "objectID": "docs/multitarget-regression-soil-properties.html#feature-importances",
- "href": "docs/multitarget-regression-soil-properties.html#feature-importances",
- "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
- "section": "Feature Importances",
- "text": "Feature Importances\n\nohe_names = deepcopy(list(stack.names))\nohe_names.insert(soil_idx[0], 'soil1')\nohe_names.insert(soil_idx[0], 'soil2')\nohe_names = np.array(ohe_names)\n\n\nmpl.style.use('ggplot')\n\nfimp = et.named_steps['regressor'].feature_importances_\n\nfig, ax = plt.subplots(figsize=(4, 6))\nax.barh(y=ohe_names[fimp.argsort()], width=fimp[fimp.argsort()])\nax.set_xlabel('Feature Importance Score')\nplt.show()",
+ "objectID": "reference/RasterLayer.html#attributes",
+ "href": "reference/RasterLayer.html#attributes",
+ "title": "RasterLayer",
+ "section": "",
+ "text": "Name\nType\nDescription\n\n\n\n\nbidx\nint\nThe band index of the RasterLayer within the file dataset.\n\n\ndtype\nstr\nThe data type of the RasterLayer.\n\n\nds\nrasterio.rasterio.band\nThe underlying rasterio.band object.\n\n\nname\nstr\nA syntactically valid name for the RasterLayer.\n\n\nfile\nstr\nThe file path to the dataset.\n\n\nnodata\nany number\nThe number that is used to represent nodata pixels in the RasterLayer.\n\n\ndriver\nstr\nThe name of the GDAL format driver.\n\n\nmeta\ndict\nA python dict storing the RasterLayer metadata.\n\n\ntransform\naffine.Affine object\nThe affine transform parameters.\n\n\ncount\nint\nNumber of layers; always equal to 1.\n\n\nshape\ntuple\nShape of RasterLayer in (rows, columns)\n\n\nwidth, height\nint\nThe width (cols) and height (rows) of the dataset.\n\n\nbounds\nBoundingBox named tuple\nA named tuple with left, bottom, right and top coordinates of the dataset.\n\n\ncmap\nstr\nThe name of matplotlib map, or a custom matplotlib.cm.LinearSegmentedColormap or ListedColormap object.\n\n\nnorm\nmatplotlib.matplotlib.colors.matplotlib.colors.Normalize(opt)\nA matplotlib.colors.Normalize to apply to the RasterLayer. This overides the norm attribute of the RasterLayer.",
"crumbs": [
- "Tutorials",
- "Multitarget regression",
- "Multi-Target Spatial Prediction using the Meuse Dataset"
+ "Reference",
+ "Raster datasets",
+ "RasterLayer"
]
},
{
- "objectID": "docs/multitarget-regression-soil-properties.html#prediction-on-the-raster-object",
- "href": "docs/multitarget-regression-soil-properties.html#prediction-on-the-raster-object",
- "title": "Multi-Target Spatial Prediction using the Meuse Dataset",
- "section": "Prediction on the Raster object",
- "text": "Prediction on the Raster object\n\npreds = stack.predict(et)\npreds.rename(\n {old: new for old, new in zip(preds.names, ['lead', 'cadmium', 'copper', 'zinc', 'om'])},\n in_place=True\n)\npreds.lead.cmap = 'rainbow'\npreds.cadmium.cmap = 'rainbow'\npreds.copper.cmap = 'rainbow'\npreds.zinc.cmap = 'rainbow'\npreds.om.cmap = 'rainbow'\n\n/Users/stevenpawley/Library/Caches/pypoetry/virtualenvs/pyspatialml-NqZ1tMUm-py3.11/lib/python3.11/site-packages/sklearn/base.py:493: UserWarning: X does not have valid feature names, but OneHotEncoder was fitted with feature names\n warnings.warn(\n/Users/stevenpawley/Library/Caches/pypoetry/virtualenvs/pyspatialml-NqZ1tMUm-py3.11/lib/python3.11/site-packages/sklearn/base.py:493: UserWarning: X does not have valid feature names, but OneHotEncoder was fitted with feature names\n warnings.warn(\n\n\nPlot the results:\n\npreds.plot(out_shape=(200, 200), title_fontsize=14, figsize=(10, 8))\nplt.show()",
+ "objectID": "reference/RasterLayer.html#methods",
+ "href": "reference/RasterLayer.html#methods",
+ "title": "RasterLayer",
+ "section": "",
+ "text": "Name\nDescription\n\n\n\n\nmax\nMaximum value.\n\n\nmean\nMean value\n\n\nmedian\nMedian value\n\n\nmin\nMinimum value.\n\n\nplot\nPlot a RasterLayer using matplotlib.pyplot.imshow\n\n\nread\nRead method for a single RasterLayer.\n\n\nstddev\nStandard deviation\n\n\nwrite\nWrite method for a single RasterLayer.\n\n\n\n\n\nRasterLayer.max(max_pixels=10000)\nMaximum value.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe maximum value of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.mean(max_pixels=10000)\nMean value\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe mean value of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.median(max_pixels=10000)\nMedian value\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe medium value of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.min(max_pixels=10000)\nMinimum value.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe minimum value of the object\n\n\n\n\n\n\n\nRasterLayer.plot(cmap=None, norm=None, ax=None, cax=None, figsize=None, out_shape=(500, 500), categorical=None, legend=False, vmin=None, vmax=None, fig_kwds=None, legend_kwds=None)\nPlot a RasterLayer using matplotlib.pyplot.imshow\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ncmap\nstr (default None)\nThe name of a colormap recognized by matplotlib. Overrides the cmap attribute of the RasterLayer.\nNone\n\n\nnorm\nmatplotlib.matplotlib.colors.matplotlib.colors.Normalize(opt)\nA matplotlib.colors.Normalize to apply to the RasterLayer. This overrides the norm attribute of the RasterLayer.\nNone\n\n\nax\nmatplotlib.pyplot.Artist (optional\naxes instance on which to draw to plot.\nNone)\n\n\ncax\nmatplotlib.pyplot.Artist (optional\naxes on which to draw the legend.\nNone)\n\n\nfigsize\ntuple of integers (optional\nSize of the matplotlib.figure.Figure. If the ax argument is given explicitly, figsize is ignored.\nNone)\n\n\nout_shape\ntuple\nNumber of rows, cols to read from the raster datasets for plotting.\n(500, 500)\n\n\ncategorical\nbool (optional\nif True then the raster values will be considered to represent discrete values, otherwise they are considered to represent continuous values. This overrides the RasterLayer ‘categorical’ attribute. Setting the argument categorical to True is ignored if the RasterLayer.categorical is already True.\nFalse)\n\n\nlegend\nbool (optional\nWhether to plot the legend.\nFalse)\n\n\nvmin\nscale (optional\nvmin and vmax define the data range that the colormap covers. By default, the colormap covers the complete value range of the supplied data. vmin, vmax are ignored if the norm parameter is used.\nNone)\n\n\nxmax\nscale (optional\nvmin and vmax define the data range that the colormap covers. By default, the colormap covers the complete value range of the supplied data. vmin, vmax are ignored if the norm parameter is used.\nNone)\n\n\nfig_kwds\ndict (optional\nAdditional arguments to pass to the matplotlib.pyplot.figure call when creating the figure object. Ignored if ax is passed to the plot function.\nNone)\n\n\nlegend_kwds\ndict (optional\nKeyword arguments to pass to matplotlib.pyplot.colorbar().\nNone)\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nmatplotlib axes instance\n\n\n\n\n\n\n\n\nRasterLayer.read(**kwargs)\nRead method for a single RasterLayer.\nReads the pixel values from a RasterLayer into a ndarray that always will have two dimensions in the order of (rows, columns).\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\n**kwargs\nnamed arguments that can be passed to the the\nrasterio.DatasetReader.read method.\n{}\n\n\n\n\n\n\n\nRasterLayer.stddev(max_pixels=10000)\nStandard deviation\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nmax_pixels\nint\nNumber of pixels used to inform statistical estimate.\n10000\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nnumpy.numpy.float32\nThe standard deviation of the object’s pixels.\n\n\n\n\n\n\n\nRasterLayer.write(file_path, driver='GTiff', dtype=None, nodata=None, **kwargs)\nWrite method for a single RasterLayer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\nfile_path\nstr(opt)\nFile path to save the dataset.\nrequired\n\n\ndriver\nstr\nGDAL-compatible driver used for the file format.\n'GTiff'\n\n\ndtype\nstr(opt)\nNumpy dtype used for the file. If omitted then the RasterLayer’s dtype is used.\nNone\n\n\nnodata\nany number (opt)\nA value used to represent the nodata pixels. If omitted then the RasterLayer’s nodata value is used (if assigned already).\nNone\n\n\nkwargs\nopt\nOptional named arguments to pass to the format drivers. For example can be compress=\"deflate\" to add compression.\n{}\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\npyspatialml.pyspatialml.RasterLayer",
"crumbs": [
- "Tutorials",
- "Multitarget regression",
- "Multi-Target Spatial Prediction using the Meuse Dataset"
+ "Reference",
+ "Raster datasets",
+ "RasterLayer"
]
},
{
- "objectID": "docs/landcover.html",
- "href": "docs/landcover.html",
- "title": "Landcover classification",
+ "objectID": "reference/vector.html",
+ "href": "reference/vector.html",
+ "title": "vector",
"section": "",
- "text": "Landcover classification is a common task in remote sensing. This example demonstrates how to extract training data from a raster and vector data, train a classifier, and predict landcover classes on a raster.",
+ "text": "vector\n\n\n\n\n\nName\nDescription\n\n\n\n\nfilter_points\nFilter points in geodataframe using a minimum distance buffer.\n\n\nget_random_point_in_polygon\nGenerates random shapely Point geometry objects within a single\n\n\n\n\n\nvector.filter_points(gdf, min_dist=0, remove='first')\nFilter points in geodataframe using a minimum distance buffer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ngdf\nGeopandas GeoDataFrame\nContaining point geometries.\nrequired\n\n\nmin_dist\n(int or float, optional(default=0))\nMinimum distance by which to filter out closely spaced points.\n0\n\n\nremove\n(str, optional(default=first))\nOptionally choose to remove ‘first’ occurrences or ‘last’ occurrences.\n'first'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\n2d array-like\nNumpy array filtered coordinates\n\n\n\n\n\n\n\nvector.get_random_point_in_polygon(poly)\nGenerates random shapely Point geometry objects within a single shapely Polygon object.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\npoly\nShapely Polygon object\n\nrequired\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nShapely Point object",
"crumbs": [
- "Tutorials",
- "Landcover",
- "Landcover classification"
+ "Reference",
+ "Vector tools",
+ "vector"
]
},
{
- "objectID": "docs/landcover.html#data",
- "href": "docs/landcover.html#data",
- "title": "Landcover classification",
- "section": "Data",
- "text": "Data\nThe data used in this example is from the Landsat 7 ETM+ sensor, and represents an extract of data derived from the GRASS GIS North Carolina example dataset. The data consists of 6 bands (1, 2, 3, 4, 5, 7) and labelled pixels. The labelled pixels are used as training data for the classifier. The data is stored in the pyspatialml.datasets module.",
+ "objectID": "reference/vector.html#functions",
+ "href": "reference/vector.html#functions",
+ "title": "vector",
+ "section": "",
+ "text": "Name\nDescription\n\n\n\n\nfilter_points\nFilter points in geodataframe using a minimum distance buffer.\n\n\nget_random_point_in_polygon\nGenerates random shapely Point geometry objects within a single\n\n\n\n\n\nvector.filter_points(gdf, min_dist=0, remove='first')\nFilter points in geodataframe using a minimum distance buffer.\n\n\n\n\n\n\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\ngdf\nGeopandas GeoDataFrame\nContaining point geometries.\nrequired\n\n\nmin_dist\n(int or float, optional(default=0))\nMinimum distance by which to filter out closely spaced points.\n0\n\n\nremove\n(str, optional(default=first))\nOptionally choose to remove ‘first’ occurrences or ‘last’ occurrences.\n'first'\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\n2d array-like\nNumpy array filtered coordinates\n\n\n\n\n\n\n\nvector.get_random_point_in_polygon(poly)\nGenerates random shapely Point geometry objects within a single shapely Polygon object.\n\n\n\n\n\nName\nType\nDescription\nDefault\n\n\n\n\npoly\nShapely Polygon object\n\nrequired\n\n\n\n\n\n\n\n\n\nType\nDescription\n\n\n\n\nShapely Point object",
"crumbs": [
- "Tutorials",
- "Landcover",
- "Landcover classification"
+ "Reference",
+ "Vector tools",
+ "vector"
]
},
{
- "objectID": "docs/landcover.html#extraction-training-data",
- "href": "docs/landcover.html#extraction-training-data",
- "title": "Landcover classification",
- "section": "Extraction Training Data",
- "text": "Extraction Training Data\nLoad some training data in the form of polygons, points and labelled pixels in geopandas.GeoDataFrame objects. We will also generate some line geometries by converting the polygon boundaries into linestrings. All of these geometry types can be used to spatially query pixel values in a Raster object, however each GeoDataFrame must contain only one type of geometry (i.e. either shapely points, polygons or linestrings).\n\nfrom pyspatialml import Raster\nfrom pyspatialml.datasets import nc\nfrom copy import deepcopy\nimport os\nimport numpy as np\nimport tempfile\nimport geopandas\nimport rasterio.plot\nimport matplotlib.pyplot as plt\n\ntraining_py = geopandas.read_file(nc.polygons)\ntraining_pt = geopandas.read_file(nc.points)\ntraining_px = rasterio.open(nc.labelled_pixels)\ntraining_lines = deepcopy(training_py)\ntraining_lines['geometry'] = training_lines.geometry.boundary\n\nShow training data points and a single raster band using numpy and matplotlib:\n\npredictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7]\nstack = Raster(predictors)\n\nfig, ax = plt.subplots(figsize=(9, 9))\nstack.lsat7_2000_70.plot(ax=ax)\n\ntraining_py.plot(column=\"label\", ax=ax, legend=True)\nplt.show()\n\n\n\n\n\n\n\n\nPixel values in the Raster object can be spatially queried using the extract_vector and extract_raster methods. In addition, the extract_xy method can be used to query pixel values using a 2d array of x and y coordinates.\nThe extract_vector method accepts a Geopandas GeoDataFrame as the gdf argument. For GeoDataFrames containing shapely point geometries, the closest pixel to each point is sampled. For shapely polygon geometries, all pixels whose centres are inside the polygon are sampled. For shapely linestring geometries, every pixel touched by the line is sampled. For all geometry types, pixel values are queries for each geometry separately. This means that overlapping polygons or points that fall within the same pixel with cause the same pixel to be sampled multiple times.\nBy default, the extract functions return a Geopandas GeoDataFrame of point geometries and the DataFrame containing the extracted pixels, with the column names set by the names of the raster datasets in the Raster object. The user can also use the return_array=True argument, which instead of returning a DataFrame will return three masked numpy arrays (ids, X, xy) containing the geodataframe index positions, extracted pixel values, and the spatial coordinates of the sampled pixels. These arrays are masked arrays.\nThe extract_raster method can also be used to spatially query pixel values from a Raster object using another raster containing labelled pixels. This raster has to be spatially aligned with the Raster object. The values of the labelled pixels are returned along with the queried pixel values.\n\n# Extract data from rasters at the training point locations:\ndf_points = stack.extract_vector(training_pt)\ndf_polygons = stack.extract_vector(training_py)\ndf_lines = stack.extract_vector(training_lines)\n\nFor any vector features, a GeoDataFrame is returned containing the extracted pixel values. A pandas.MultiIndex is used to relate the pixels back to the original geometries, with the pixel_idx index referring to the index of each pixel, and the geometry_idx referring to the index of the original geometry in the supplied GeoDataFrame. The pixel values themselves are represented as shapely.geometry.Point objects. These will need to be joined back with the columns of the vector features to get the labelled classes. Here we will join the extracted pixels using the “id” column and the GeoDataFrame index of the vector features:\n\n# Join the extracted values with other columns from the training data\ndf_points[\"id\"] = training_pt[\"id\"].values\ndf_points = df_points.dropna()\ndf_points.head()\n\ndf_polygons = df_polygons.merge(\n right=training_py.loc[:, [\"label\", \"id\"]], \n left_on=\"geometry_idx\", \n right_on=\"index\",\n right_index=True\n)\n\nIf the training data is from labelled pixels in a raster, then the extracted data will contain a “value” column that contains the pixel labels:\n\ndf_raster = stack.extract_raster(training_px)",
+ "objectID": "docs/installation.html",
+ "href": "docs/installation.html",
+ "title": "Installation",
+ "section": "",
+ "text": "Pyspatialml is available on PyPI and can be installed in the usual manner with:\n\npip install Pyspatialml\n\nThe development version, which is more up-to-date with changes to the package especially during these earlier stages of development, can be installed directly via:\n\npip install git+https://github.com/stevenpawley/Pyspatialml",
"crumbs": [
- "Tutorials",
- "Landcover",
- "Landcover classification"
+ "Installation"
]
},
{
- "objectID": "docs/landcover.html#model-training",
- "href": "docs/landcover.html#model-training",
- "title": "Landcover classification",
- "section": "Model Training",
- "text": "Model Training\nNext we can train a logistic regression classifier:\n\nfrom sklearn.linear_model import LogisticRegressionCV\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import cross_validate\n\n# define the classifier with standardization of the input features in a\n# pipeline\nlr = Pipeline(\n [('scaling', StandardScaler()),\n ('classifier', LogisticRegressionCV(n_jobs=-1))])\n\n# remove NaNs from training data\ndf_polygons = df_polygons.dropna()\n\n# fit the classifier\nX = df_polygons.drop(columns=[\"id\", \"label\", \"geometry\"]).values\ny = df_polygons[\"id\"].values\nlr.fit(X, y)\n\nPipeline(steps=[('scaling', StandardScaler()),\n ('classifier', LogisticRegressionCV(n_jobs=-1))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. Pipeline?Documentation for PipelineiFittedPipeline(steps=[('scaling', StandardScaler()),\n ('classifier', LogisticRegressionCV(n_jobs=-1))]) StandardScaler?Documentation for StandardScalerStandardScaler() LogisticRegressionCV?Documentation for LogisticRegressionCVLogisticRegressionCV(n_jobs=-1) \n\n\nAfter defining a classifier, a typical step consists of performing a cross-validation to evaluate the performance of the model. Scikit-learn provides the cross_validate function for this purpose. In comparison to non-spatial data, spatial data can be spatially correlated, which potentially can mean that geographically proximal samples may not represent truely independent samples if they are within the autocorrelation range of some of the predictors. This will lead to overly optimistic performance measures if samples in the training dataset / cross-validation partition are strongly spatially correlated with samples in the test dataset / cross-validation partition.\nIn this case, performing cross-validation using groups is useful, because these groups can represent spatial clusters of training samples, and samples from the same group will never occur in both the training and test partitions of a cross-validation. Here we can use the polygon indices as the groups, i.e. pixels within the same polygon will not be split into training and test partitions:\n\nscores = cross_validate(\n estimator=lr,\n X=X,\n y=y,\n groups=df_polygons.index.droplevel(\"pixel_idx\"),\n scoring=\"accuracy\",\n cv=3,\n n_jobs=1,\n)\nnp.round(scores['test_score'].mean(), 2)\n\n0.75",
+ "objectID": "docs/plotting.html",
+ "href": "docs/plotting.html",
+ "title": "Plotting",
+ "section": "",
+ "text": "Both Raster and RasterLayer objects include basic plotting methods. The plot method for a RasterLayer object produces a single raster plot using the matplotlib.pyplot.imshow method.\nFor convenience, plot settings such as color ramps and stretches can also be set for each RasterLayer using the RasterLayer.cmap that support matplotlib cmap’s, and the RasterLayer.norm attribute to associate a matplotlib.colors.Normalize stretch with each RasterLayer:\nTo plot a single RasterLayer:\n\nfrom pyspatialml import Raster\nfrom pyspatialml.datasets import nc\nimport matplotlib.pyplot as plt\n\nstack = Raster([nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7])\n\n# set RasterLayer color table\nstack.lsat7_2000_10.cmap = \"plasma\"\n\n# plot a single layer using an existing axis\nfig, ax = plt.subplots()\nstack.lsat7_2000_10.plot(ax=ax)\nplt.show()\n\n\n\n\n\n\n\n\nFor RasterLayers that represent categorical data types, e.g. land cover, then the RasterLayer.categorical=True attribute will cause the cmap to be converted to a discrete scale.\nThe default plot method for a Raster object produces a raster-matrix plot of the individual RasterLayers. By default this plot preserves the plotting attributes of the individual rasters:\nPlot all RasterLayers in a Raster object:\n\nstack.lsat7_2000_10.cmap = \"Blues\"\nstack.lsat7_2000_20.cmap = \"Greens\"\nstack.lsat7_2000_30.cmap = \"Reds\"\nstack.lsat7_2000_40.cmap = \"RdPu\"\nstack.lsat7_2000_50.cmap = \"autumn\"\nstack.lsat7_2000_70.cmap = \"hot\"\n\nstack.plot(\n title_fontsize=8,\n label_fontsize=6,\n legend_fontsize=6,\n names=[\"B1\", \"B2\", \"B3\", \"B4\", \"B5\", \"B7\"],\n fig_kwds={\"figsize\": (8, 4)},\n subplots_kwds={\"wspace\": 0.3}\n)\nplt.show()\n\n\n\n\n\n\n\n\nThe Raster.plot method also provides cmap and norm arguments that can be used to override the settings of the individual RasterLayers. Additional settings can be passed to control plot layout using the figure_kwds, legend_kwds and subplots_kwds arguments.",
"crumbs": [
- "Tutorials",
- "Landcover",
- "Landcover classification"
+ "Guide",
+ "Geoprocessing",
+ "Plotting"
]
},
{
- "objectID": "docs/landcover.html#raster-prediction",
- "href": "docs/landcover.html#raster-prediction",
- "title": "Landcover classification",
- "section": "Raster Prediction",
- "text": "Raster Prediction\nPrediction on the Raster object is performed using the predict method. The estimator is the only required argument. If the file_path argument is not specified then the result is automatically written to a temporary file. The predict method returns an rasterio.io.DatasetReader object which is open.\n\n# prediction\nresult = stack.predict(estimator=lr, dtype='int16', nodata=0)\nresult_probs = stack.predict_proba(estimator=lr)\n\n# plot classification result\nresult.iloc[0].cmap = \"Dark2\"\nresult.iloc[0].categorical = True\n\nresult.plot()\nplt.show()\n\n\n\n\n\n\n\n\nThe predict_proba method can be used to output class probabilities as a multi-band raster (a band for each class probability). In the latter case, indexes can also be supplied if you only want to output the probabilities for a particular class, or list of classes, by supplying the indices of those classes:\n\nresult_probs.plot()\nplt.show()\n\n/Users/stevenpawley/Library/Caches/pypoetry/virtualenvs/pyspatialml-NqZ1tMUm-py3.11/lib/python3.11/site-packages/matplotlib/image.py:499: RuntimeWarning: overflow encountered in divide\n A_scaled /= ((a_max - a_min) / frac)",
+ "objectID": "docs/sampling.html",
+ "href": "docs/sampling.html",
+ "title": "Random Sampling",
+ "section": "",
+ "text": "For many spatial models, it is common to take a random sample of the predictors to represent a single class (i.e. an environmental background or pseudo-absences in a binary classification model). The sample function is supplied in the sampling module for this purpose:\n\nfrom pyspatialml import Raster\nimport pyspatialml.datasets.nc as nc\nimport matplotlib.pyplot as plt\n\npredictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7]\nstack = Raster(predictors)\n\n# extract training data using a random sample\ndf_rand = stack.sample(size=1000, random_state=1)\ndf_rand.plot()",
"crumbs": [
- "Tutorials",
- "Landcover",
- "Landcover classification"
+ "Guide",
+ "Geoprocessing",
+ "Random Sampling"
]
},
{
- "objectID": "docs/quickstart.html",
- "href": "docs/quickstart.html",
- "title": "Quick start",
+ "objectID": "docs/sampling.html#random-uniform-sampling",
+ "href": "docs/sampling.html#random-uniform-sampling",
+ "title": "Random Sampling",
"section": "",
- "text": "We are going to use a set of Landsat 7 bands contained within the nc example data:\n\nfrom pyspatialml import Raster\nimport pyspatialml.datasets.nc as nc\nimport matplotlib.pyplot as plt\n\npredictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7]\n\nThese raster datasets are aligned in terms of their extent and coordinate reference systems. We can ‘stack’ these into a Raster class so that we can perform machine learning related operations on the set of rasters:\n\nstack = Raster(predictors)\n\nWhen a Raster object is created, the names to each layer are automatically created based on syntactically-correct versions of the file basenames:\n\nstack.names\n\ndict_keys(['lsat7_2000_10', 'lsat7_2000_20', 'lsat7_2000_30', 'lsat7_2000_40', 'lsat7_2000_50', 'lsat7_2000_70'])\n\n\nColor ramps and matplotlib.colors.Normalize objects can be assigned to each RasterLayer in the object using the cmap and norm attributes for convenient in plotting:\n\nstack.lsat7_2000_10.cmap = \"Blues\"\nstack.lsat7_2000_20.cmap = \"Greens\"\nstack.lsat7_2000_30.cmap = \"Reds\"\nstack.lsat7_2000_40.cmap = \"RdPu\"\nstack.lsat7_2000_50.cmap = \"autumn\"\nstack.lsat7_2000_70.cmap = \"hot\"\n\nstack.plot(\n title_fontsize=8,\n label_fontsize=6,\n legend_fontsize=6,\n names=[\"B1\", \"B2\", \"B3\", \"B4\", \"B5\", \"B7\"],\n fig_kwds={\"figsize\": (8, 4)},\n subplots_kwds={\"wspace\": 0.3}\n)\nplt.show()",
+ "text": "For many spatial models, it is common to take a random sample of the predictors to represent a single class (i.e. an environmental background or pseudo-absences in a binary classification model). The sample function is supplied in the sampling module for this purpose:\n\nfrom pyspatialml import Raster\nimport pyspatialml.datasets.nc as nc\nimport matplotlib.pyplot as plt\n\npredictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7]\nstack = Raster(predictors)\n\n# extract training data using a random sample\ndf_rand = stack.sample(size=1000, random_state=1)\ndf_rand.plot()",
"crumbs": [
"Guide",
"Geoprocessing",
- "Quick start"
+ "Random Sampling"
]
},
{
- "objectID": "docs/quickstart.html#initiating-a-raster-object",
- "href": "docs/quickstart.html#initiating-a-raster-object",
- "title": "Quick start",
- "section": "",
- "text": "We are going to use a set of Landsat 7 bands contained within the nc example data:\n\nfrom pyspatialml import Raster\nimport pyspatialml.datasets.nc as nc\nimport matplotlib.pyplot as plt\n\npredictors = [nc.band1, nc.band2, nc.band3, nc.band4, nc.band5, nc.band7]\n\nThese raster datasets are aligned in terms of their extent and coordinate reference systems. We can ‘stack’ these into a Raster class so that we can perform machine learning related operations on the set of rasters:\n\nstack = Raster(predictors)\n\nWhen a Raster object is created, the names to each layer are automatically created based on syntactically-correct versions of the file basenames:\n\nstack.names\n\ndict_keys(['lsat7_2000_10', 'lsat7_2000_20', 'lsat7_2000_30', 'lsat7_2000_40', 'lsat7_2000_50', 'lsat7_2000_70'])\n\n\nColor ramps and matplotlib.colors.Normalize objects can be assigned to each RasterLayer in the object using the cmap and norm attributes for convenient in plotting:\n\nstack.lsat7_2000_10.cmap = \"Blues\"\nstack.lsat7_2000_20.cmap = \"Greens\"\nstack.lsat7_2000_30.cmap = \"Reds\"\nstack.lsat7_2000_40.cmap = \"RdPu\"\nstack.lsat7_2000_50.cmap = \"autumn\"\nstack.lsat7_2000_70.cmap = \"hot\"\n\nstack.plot(\n title_fontsize=8,\n label_fontsize=6,\n legend_fontsize=6,\n names=[\"B1\", \"B2\", \"B3\", \"B4\", \"B5\", \"B7\"],\n fig_kwds={\"figsize\": (8, 4)},\n subplots_kwds={\"wspace\": 0.3}\n)\nplt.show()",
+ "objectID": "docs/sampling.html#stratified-random-sampling",
+ "href": "docs/sampling.html#stratified-random-sampling",
+ "title": "Random Sampling",
+ "section": "Stratified Random Sampling",
+ "text": "Stratified Random Sampling\nThe sample function also enables stratified random sampling based on passing a categorical raster dataset to the strata argument. The categorical raster should spatially overlap with the dataset to be sampled, but it does not need to be of the same grid resolution. This raster should be passed as a opened rasterio dataset:\n\nstrata = Raster(nc.strata)\ndf_strata = stack.sample(size=5, strata=strata, random_state=1)\ndf_strata = df_strata.dropna()\ndf_strata\n\n\n\n\n\n\n\n\n\nlsat7_2000_10\nlsat7_2000_20\nlsat7_2000_30\nlsat7_2000_40\nlsat7_2000_50\nlsat7_2000_70\ngeometry\n\n\n\n\n0\n96.0\n78.0\n88.0\n49.0\n71.0\n63.0\nPOINT (641093.250 225135.750)\n\n\n1\n113.0\n103.0\n122.0\n66.0\n136.0\n110.0\nPOINT (640979.250 222342.750)\n\n\n3\n82.0\n66.0\n67.0\n64.0\n76.0\n52.0\nPOINT (640095.750 225848.250)\n\n\n4\n99.0\n88.0\n95.0\n56.0\n98.0\n78.0\nPOINT (637559.250 226788.750)\n\n\n5\n81.0\n69.0\n76.0\n73.0\n118.0\n72.0\nPOINT (635621.250 218324.250)\n\n\n10\n91.0\n78.0\n81.0\n77.0\n97.0\n73.0\nPOINT (634709.250 221943.750)\n\n\n11\n72.0\n61.0\n51.0\n104.0\n91.0\n47.0\nPOINT (639269.250 220005.750)\n\n\n12\n86.0\n75.0\n78.0\n73.0\n87.0\n60.0\nPOINT (639326.250 224964.750)\n\n\n13\n71.0\n53.0\n48.0\n59.0\n78.0\n46.0\nPOINT (635222.250 218951.250)\n\n\n15\n76.0\n59.0\n63.0\n65.0\n114.0\n64.0\nPOINT (633027.750 218580.750)\n\n\n17\n75.0\n61.0\n55.0\n70.0\n74.0\n43.0\nPOINT (633369.750 219435.750)\n\n\n18\n78.0\n66.0\n69.0\n69.0\n110.0\n72.0\nPOINT (633198.750 225506.250)\n\n\n19\n68.0\n52.0\n40.0\n79.0\n58.0\n30.0\nPOINT (637986.750 222998.250)\n\n\n20\n70.0\n55.0\n52.0\n62.0\n79.0\n47.0\nPOINT (635649.750 217440.750)\n\n\n22\n71.0\n53.0\n48.0\n64.0\n77.0\n42.0\nPOINT (635564.250 222713.250)\n\n\n23\n72.0\n53.0\n51.0\n58.0\n82.0\n51.0\nPOINT (633056.250 218324.250)\n\n\n26\n81.0\n78.0\n79.0\n34.0\n41.0\n28.0\nPOINT (639297.750 223625.250)\n\n\n27\n73.0\n57.0\n51.0\n16.0\n14.0\n10.0\nPOINT (635364.750 224736.750)\n\n\n28\n73.0\n57.0\n52.0\n55.0\n57.0\n40.0\nPOINT (635535.750 223311.750)\n\n\n30\n138.0\n120.0\n132.0\n65.0\n129.0\n126.0\nPOINT (634196.250 226190.250)\n\n\n31\n72.0\n60.0\n47.0\n69.0\n82.0\n46.0\nPOINT (639810.750 219749.250)\n\n\n32\n132.0\n122.0\n140.0\n73.0\n171.0\n176.0\nPOINT (640352.250 218238.750)\n\n\n33\n170.0\n157.0\n176.0\n80.0\n182.0\n183.0\nPOINT (639924.750 219692.250)\n\n\n34\n115.0\n98.0\n106.0\n60.0\n110.0\n102.0\nPOINT (639953.250 219578.250)",
"crumbs": [
"Guide",
"Geoprocessing",
- "Quick start"
+ "Random Sampling"
]
},
{
- "objectID": "docs/quickstart.html#subsetting-and-indexing",
- "href": "docs/quickstart.html#subsetting-and-indexing",
- "title": "Quick start",
- "section": "Subsetting and Indexing",
- "text": "Subsetting and Indexing\nIndexing of Raster objects is provided by several methods:\nThe Raster[keys] method enables key-based indexing using a name of a RasterLayer, or a list of names. Direct subsetting of a Raster object instance returns a RasterLayer if only a single label is used, otherwise it always returns a new Raster object containing only the selected layers.\nThe Raster.iloc[int, list, tuple, slice] method allows a Raster object instance to be subset using integer-based indexing or slicing. The iloc method returns a RasterLayer object if only a single index is used, otherwise it always returns a new Raster object containing only the selected layers.\nSubsetting of a Raster object instance can also occur by using attribute names in the form of Raster.name_of_layer. Because only a single RasterLayer can be subset at one time using this approach, a RasterLayer object is always returned.\nExamples of methods to subset a Raster object:\n\n# subset based on position\nsingle_layer = stack.iloc[0]\n\n# subset using a slice\nnew_raster_obj = stack.iloc[0:3]\n\n# subset using labels\nsingle_layer = stack['lsat7_2000_10']\nsingle_layer = stack.lsat7_2000_10\n\n# list or tuple of keys\nnew_raster_obj = stack[('lsat7_2000_10', 'lsat7_2000_20')]\n\nIterate through RasterLayers individually:\n\nfor name, layer in stack.items():\n print(name, layer)\n\nlsat7_2000_10 <pyspatialml.rasterlayer.RasterLayer object at 0x17fb6b650>\nlsat7_2000_20 <pyspatialml.rasterlayer.RasterLayer object at 0x11e5d21d0>\nlsat7_2000_30 <pyspatialml.rasterlayer.RasterLayer object at 0x16d168bd0>\nlsat7_2000_40 <pyspatialml.rasterlayer.RasterLayer object at 0x17f80cc90>\nlsat7_2000_50 <pyspatialml.rasterlayer.RasterLayer object at 0x17fb6a110>\nlsat7_2000_70 <pyspatialml.rasterlayer.RasterLayer object at 0x17fd92510>\n\n\nReplace a RasterLayer with another:\n\nstack.iloc[0] = Raster(nc.band7).iloc[0]\n\nstack.iloc[0].plot()\nplt.show()",
+ "objectID": "docs/transformers.html",
+ "href": "docs/transformers.html",
+ "title": "Transformers",
+ "section": "",
+ "text": "The transformers module contains classes that are used for spatial feature engineering.",
"crumbs": [
"Guide",
"Geoprocessing",
- "Quick start"
+ "Transformers"
]
},
{
- "objectID": "docs/quickstart.html#appending-and-dropping-layers",
- "href": "docs/quickstart.html#appending-and-dropping-layers",
- "title": "Quick start",
- "section": "Appending and Dropping Layers",
- "text": "Appending and Dropping Layers\nAppend layers from another Raster to the stack. Duplicate names are automatically given a suffix.\n\nstack.append(Raster(nc.band7), in_place=True)\nstack.names\n\ndict_keys(['lsat7_2000_10', 'lsat7_2000_20', 'lsat7_2000_30', 'lsat7_2000_40', 'lsat7_2000_50', 'lsat7_2000_70_1', 'lsat7_2000_70_2'])\n\n\nRename RasterLayers using a dict of old_name : new_name pairs:\n\nstack.names\nstack.rename({'lsat7_2000_30': 'new_name'}, in_place=True)\nstack.names\nstack.new_name\nstack['new_name']\n\n<pyspatialml.rasterlayer.RasterLayer at 0x16d168bd0>\n\n\nDrop a RasterLayer:\n\nstack.names\nstack.drop(labels='lsat7_2000_70_1', in_place=True)\nstack.names\n\ndict_keys(['lsat7_2000_10', 'lsat7_2000_20', 'new_name', 'lsat7_2000_40', 'lsat7_2000_50', 'lsat7_2000_70_2'])",
+ "objectID": "docs/transformers.html#spatial-lag-transformer",
+ "href": "docs/transformers.html#spatial-lag-transformer",
+ "title": "Transformers",
+ "section": "Spatial Lag Transformer",
+ "text": "Spatial Lag Transformer\nA transformer to create spatial lag variables by using a weighted mean/mode of the values of the K-neighboring observations. The weighted mean/mode of the surrounding observations are appended as a new feature to the right-most column in the training data. The measure parameter should be set to ‘mode’ for classification, and ‘mean’ for regression.\nKNNTransformer(\n n_neighbors=7,\n weights=\"distance\",\n measure=\"mean\",\n radius=1.0,\n algorithm=\"auto\",\n leaf_size=30,\n metric=\"minkowski\",\n p=2,\n normalize=True,\n metric_params=None,\n kernel_params=None,\n n_jobs=1\n)",
"crumbs": [
"Guide",
"Geoprocessing",
- "Quick start"
+ "Transformers"
]
},
{
- "objectID": "docs/quickstart.html#integration-with-pandas",
- "href": "docs/quickstart.html#integration-with-pandas",
- "title": "Quick start",
- "section": "Integration with Pandas",
- "text": "Integration with Pandas\nData from a Raster object can converted into a Pandas.DataDrame, with each pixel representing by a row, and columns reflecting the x, y coordinates and the values of each RasterLayer in the Raster object:\n\nimport pandas as pd\n\ndf = stack.to_pandas(max_pixels=50000, resampling='nearest')\ndf.head()\n\n\n\n\n\n\n\n\n\nx\ny\nlsat7_2000_10\nlsat7_2000_20\nnew_name\nlsat7_2000_40\nlsat7_2000_50\nlsat7_2000_70_2\n\n\n\n\n0\n630534.000000\n228114.0\nNaN\nNaN\nNaN\nNaN\nNaN\nNaN\n\n\n1\n630562.558402\n228114.0\nNaN\nNaN\nNaN\nNaN\nNaN\nNaN\n\n\n2\n630591.116803\n228114.0\nNaN\nNaN\nNaN\nNaN\nNaN\nNaN\n\n\n3\n630619.675205\n228114.0\nNaN\nNaN\nNaN\nNaN\nNaN\nNaN\n\n\n4\n630648.233607\n228114.0\nNaN\nNaN\nNaN\nNaN\nNaN\nNaN\n\n\n\n\n\n\n\n\nThe original raster is up-sampled based on max_pixels and the resampling method, which uses all of resampling methods available in the underlying rasterio library for decimated reads.",
+ "objectID": "docs/transformers.html#geodisttransformer",
+ "href": "docs/transformers.html#geodisttransformer",
+ "title": "Transformers",
+ "section": "GeoDistTransformer",
+ "text": "GeoDistTransformer\nA common spatial feature engineering task is to create new features that describe the proximity to some reference locations. The GeoDistTransformer can be used to add these features as part of a machine learning pipeline.\nGeoDistTransformer(refs, log=False)\nWhere refs are an array of coordinates of reference locations in (m, n-dimensional) order, such as {n_locations, x_coordinates, y_coordinates, …} for as many dimensions as required. For example to calculate distances to a single x,y,z location:\nrefs = [-57.345, -110.134, 1012]\nAnd to calculate distances to three x,y reference locations:\nrefs = [\n [-57.345, -110.134],\n [-56.345, -109.123],\n [-58.534, -112.123]\n]\nThe supplied array has to have at least x,y coordinates with a (1, 2) shape for a single location.",
"crumbs": [
"Guide",
"Geoprocessing",
- "Quick start"
+ "Transformers"
]
},
{
- "objectID": "docs/quickstart.html#saving-a-raster-to-file",
- "href": "docs/quickstart.html#saving-a-raster-to-file",
- "title": "Quick start",
- "section": "Saving a Raster to File",
- "text": "Saving a Raster to File\nSave a Raster:\n\nimport tempfile\n\ntmp_tif = tempfile.NamedTemporaryFile().name + '.tif'\nnewstack = stack.write(file_path=tmp_tif, nodata=-9999)\nnewstack.new_name.read()\nnewstack = None",
+ "objectID": "docs/usage.html",
+ "href": "docs/usage.html",
+ "title": "Usage",
+ "section": "",
+ "text": "The main approach to working with raster datasets in Pyspatialml is through the Raster class. The Raster object takes a list of GDAL-supported raster datasets and references them as part of a single Raster object, which can be used to perform operations on the raster datasets as a whole. The Raster object is a thin wrapper around the rasterio library, which is a Python library for reading and writing raster datasets. The individual bands within the datasets are represented internally as RasterLayer objects. This allows for retaining metadata about each raster dataset and adding or removing raster datasets from the stack without making physical changes to the disk.\nNote that in order to initiate a Raster object, the underlying raster datasets must be spatially aligned in terms of their extent, resolution, and coordinate reference system - Raster objects do not perform any resampling or reprojection of the underlying datasets. Functions within the preprocessing module can be used to align raster datasets before creating a Raster object.\n\n\nThe most common approach of initiating a Raster object is from an existing raster dataset, or a list of raster datasets. Alternatively, a Raster object can also be initiated from a 3D numpy array:\n\nRaster(src=[raster1.tif, raster2.tif, raster3.tif]) creates a Raster object from existing file-based GDAL-supported datasets, or a single raster dataset. The file-based datasets can contain single or multiple bands.\nRaster(src=new_numpy_array, crs=crs, transform=transform) creates a Raster object from a 3D numpy array (band, row, column). The crs and transform arguments are optional but are required to provide coordinate reference system information to the Raster object. The crs argument has to be represented by rasterio crs.CRS object, and the transform parameter requires an affine.Affine object.\n\nRasters can also be initated directly from a rasterio.Band object(s), or from a list of RasterLayer objects (see below).\n\n\n\nGenerally, Pyspatialml intends users to work with the Raster object. However, internally, the Raster object is composed of RasterLayer objects, which represent individual bands of a raster dataset. RasterLayers are based on a rasterio.band object with some additional attributes and methods. However, unlike the rasterio.Band.ds.read method which reads all bands within a multi-band dataset, the RasterLayer read method always refers to a single band.\nMethods contained within RasterLayer objects are specifically designed to be applied to individual bands of a raster. These methods include operations such as sieve-clump, distance to non-NaN pixels, and arithmetic operations on individual layers.",
"crumbs": [
- "Guide",
- "Geoprocessing",
- "Quick start"
+ "Usage"
]
},
{
- "objectID": "index.html",
- "href": "index.html",
- "title": "Overview",
+ "objectID": "docs/usage.html#the-raster-class",
+ "href": "docs/usage.html#the-raster-class",
+ "title": "Usage",
"section": "",
- "text": "Pyspatialml is a Python package for applying scikit-learn machine learning models to raster-based datasets. It is inspired by the famous raster package in the R statistical programming language which has been extensively used for applying statistical and machine learning models to geospatial raster datasets.\nPyspatialml includes functions and classes for working with multiple raster datasets and applying typical machine learning workflows including raster data manipulation, feature engineering on raster datasets, extraction of training data, and application of the predict or predict_proba methods of scikit-learn estimator objects to a stack of raster datasets.\nPyspatialml is built upon the rasterio Python package which performs all of the heavy lifting and is designed to work with the geopandas package for related raster-vector data geoprocessing operations."
+ "text": "The main approach to working with raster datasets in Pyspatialml is through the Raster class. The Raster object takes a list of GDAL-supported raster datasets and references them as part of a single Raster object, which can be used to perform operations on the raster datasets as a whole. The Raster object is a thin wrapper around the rasterio library, which is a Python library for reading and writing raster datasets. The individual bands within the datasets are represented internally as RasterLayer objects. This allows for retaining metadata about each raster dataset and adding or removing raster datasets from the stack without making physical changes to the disk.\nNote that in order to initiate a Raster object, the underlying raster datasets must be spatially aligned in terms of their extent, resolution, and coordinate reference system - Raster objects do not perform any resampling or reprojection of the underlying datasets. Functions within the preprocessing module can be used to align raster datasets before creating a Raster object.\n\n\nThe most common approach of initiating a Raster object is from an existing raster dataset, or a list of raster datasets. Alternatively, a Raster object can also be initiated from a 3D numpy array:\n\nRaster(src=[raster1.tif, raster2.tif, raster3.tif]) creates a Raster object from existing file-based GDAL-supported datasets, or a single raster dataset. The file-based datasets can contain single or multiple bands.\nRaster(src=new_numpy_array, crs=crs, transform=transform) creates a Raster object from a 3D numpy array (band, row, column). The crs and transform arguments are optional but are required to provide coordinate reference system information to the Raster object. The crs argument has to be represented by rasterio crs.CRS object, and the transform parameter requires an affine.Affine object.\n\nRasters can also be initated directly from a rasterio.Band object(s), or from a list of RasterLayer objects (see below).\n\n\n\nGenerally, Pyspatialml intends users to work with the Raster object. However, internally, the Raster object is composed of RasterLayer objects, which represent individual bands of a raster dataset. RasterLayers are based on a rasterio.band object with some additional attributes and methods. However, unlike the rasterio.Band.ds.read method which reads all bands within a multi-band dataset, the RasterLayer read method always refers to a single band.\nMethods contained within RasterLayer objects are specifically designed to be applied to individual bands of a raster. These methods include operations such as sieve-clump, distance to non-NaN pixels, and arithmetic operations on individual layers.",
+ "crumbs": [
+ "Usage"
+ ]
},
{
- "objectID": "index.html#purpose",
- "href": "index.html#purpose",
- "title": "Overview",
- "section": "Purpose",
- "text": "Purpose\nA supervised machine-learning workflow as applied to spatial raster data typically involves several steps:\n\nUsing vector features or labelled pixels to extract training data from a stack of raster-based predictors (e.g. spectral bands, terrain derivatives, or climate grids). The training data represent locations when some property/state/concentration is already established, and might comprise point locations of arsenic concentrations, or labelled pixels with integer-encoded values that correspond to known landcover types.\nDeveloping a machine learning classification or regression model on the training data. Pyspatialml is designed to use scikit-learn compatible api’s for this purpose.\nApplying the fitted machine learning model to make predictions on all of the pixels in the stack of raster data.\n\nPyspatialml is designed to make it easy to develop spatial prediction models on stacks of 2D raster datasets that are held on disk. Unlike using python’s numpy module directly where raster datasets need to be held in memory, the majority of functions within pyspatialml work with raster datasets that are stored on disk and allow processing operations to be performed on datasets that are too large to be loaded into memory.\nPyspatialml is designed to make it easy to work with typical raster data stacks consisting of multiple 2D grids such as different spectal bands, maps etc. However, it’s purpose is not to work with multidimensional datasets, i.e. those that have more than 3 dimensions such as spacetime cubes of multiband data. The xarray package can provide a structure for this type of data."
+ "objectID": "docs/usage.html#principles-of-working-with-rasters",
+ "href": "docs/usage.html#principles-of-working-with-rasters",
+ "title": "Usage",
+ "section": "Principles of working with Rasters",
+ "text": "Principles of working with Rasters\nMethods that are applied to Raster objects are generally designed to be applied to the entire stack of raster datasets. For example, the crop method will crop all raster datasets in the stack to a common extent, and the mask method will apply a mask to all raster datasets in the stack. These methods always return a new Raster object, and do not modify the original Raster object by default. Subsetting of individual bands uses the same principles as the pandas library, where the loc method is used to subset bands based on their names, and the iloc method is used to subset bands based on their index. Also similarly to pandas, subsetting a single band will return the object itself, in this case, a RasterLayer object, while subsetting multiple bands will return a new Raster object.\nMethods that apply to individual RasterLayers are mostly related to extracting or summarizing metadata from the individual bands. For other methods that users may want to apply to individual bands, it is recommended to work with rasterio directly.",
+ "crumbs": [
+ "Usage"
+ ]
}
]
\ No newline at end of file
diff --git a/sitemap.xml b/sitemap.xml
index 2af2835..a5ce6b8 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -1,67 +1,63 @@
- https://stevenpawley.github.io/Pyspatialml/examples/Example 1 - Multitarget Regression of Soil Properties.html
- 2024-06-02T02:51:01.293Z
-
-
- https://stevenpawley.github.io/Pyspatialml/docs/usage.html
- 2024-06-02T02:51:01.285Z
+ https://stevenpawley.github.io/Pyspatialml/index.html
+ 2024-06-02T03:15:48.775Z
- https://stevenpawley.github.io/Pyspatialml/docs/transformers.html
- 2024-06-02T02:51:01.285Z
+ https://stevenpawley.github.io/Pyspatialml/docs/quickstart.html
+ 2024-06-02T03:15:48.775Z
- https://stevenpawley.github.io/Pyspatialml/docs/sampling.html
- 2024-06-02T02:51:01.285Z
+ https://stevenpawley.github.io/Pyspatialml/docs/landcover.html
+ 2024-06-02T03:15:48.775Z
- https://stevenpawley.github.io/Pyspatialml/docs/plotting.html
- 2024-06-02T02:51:01.285Z
+ https://stevenpawley.github.io/Pyspatialml/docs/multitarget-regression-soil-properties.html
+ 2024-06-02T03:15:48.775Z
- https://stevenpawley.github.io/Pyspatialml/docs/installation.html
- 2024-06-02T02:51:01.285Z
+ https://stevenpawley.github.io/Pyspatialml/docs/spatial-features.html
+ 2024-06-02T03:15:48.775Z
- https://stevenpawley.github.io/Pyspatialml/reference/vector.html
- 2024-06-02T02:51:01.337Z
+ https://stevenpawley.github.io/Pyspatialml/reference/Raster.html
+ 2024-06-02T03:15:48.819Z
- https://stevenpawley.github.io/Pyspatialml/reference/RasterLayer.html
- 2024-06-02T02:51:01.337Z
+ https://stevenpawley.github.io/Pyspatialml/reference/index.html
+ 2024-06-02T03:15:48.819Z
https://stevenpawley.github.io/Pyspatialml/reference/preprocessing.html
- 2024-06-02T02:51:01.337Z
+ 2024-06-02T03:15:48.819Z
- https://stevenpawley.github.io/Pyspatialml/reference/index.html
- 2024-06-02T02:51:01.337Z
+ https://stevenpawley.github.io/Pyspatialml/reference/RasterLayer.html
+ 2024-06-02T03:15:48.819Z
- https://stevenpawley.github.io/Pyspatialml/reference/Raster.html
- 2024-06-02T02:51:01.337Z
+ https://stevenpawley.github.io/Pyspatialml/reference/vector.html
+ 2024-06-02T03:15:48.819Z
- https://stevenpawley.github.io/Pyspatialml/docs/spatial-features.html
- 2024-06-02T02:51:01.285Z
+ https://stevenpawley.github.io/Pyspatialml/docs/installation.html
+ 2024-06-02T03:15:48.775Z
- https://stevenpawley.github.io/Pyspatialml/docs/multitarget-regression-soil-properties.html
- 2024-06-02T02:51:01.285Z
+ https://stevenpawley.github.io/Pyspatialml/docs/plotting.html
+ 2024-06-02T03:15:48.775Z
- https://stevenpawley.github.io/Pyspatialml/docs/landcover.html
- 2024-06-02T02:51:01.285Z
+ https://stevenpawley.github.io/Pyspatialml/docs/sampling.html
+ 2024-06-02T03:15:48.775Z
- https://stevenpawley.github.io/Pyspatialml/docs/quickstart.html
- 2024-06-02T02:51:01.285Z
+ https://stevenpawley.github.io/Pyspatialml/docs/transformers.html
+ 2024-06-02T03:15:48.775Z
- https://stevenpawley.github.io/Pyspatialml/index.html
- 2024-06-02T02:51:01.293Z
+ https://stevenpawley.github.io/Pyspatialml/docs/usage.html
+ 2024-06-02T03:15:48.775Z