Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

addition to work with NGBoost #24

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions pyspatialml/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,7 @@ def predict(

return new_raster


def _predfun(self, img, estimator):
"""Prediction function for classification or regression response.

Expand Down Expand Up @@ -1046,6 +1047,189 @@ def _predfun(self, img, estimator):

return result_cla

#################### Hack for use with NGBoost
### eg. result2dist = stack.predict_dist(estimator=ngb_unpickled, dtype='float32', nodata=-9999, file_path = r'F:\Depth Modelling\NGBOOST-RESULTS\CrystallineBasementTestDist.tif')
### https://github.com/stanfordmlgroup/ngboost

def predict_dist(
self,
estimator,
file_path=None,
driver="GTiff",
dtype=None,
nodata=None,
as_df=False,
n_jobs=-1,
progress=False,
):
"""Apply prediction of a scikit learn model to a Raster.

The model can represent any scikit learn model or compatible api with a `fit`
and `predict` method. These can consist of classification or regression models.
Multi-class classifications and multi-target regressions are also supported.

Parameters
----------
estimator : estimator object implementing 'fit'
The object to use to fit the data.

file_path : str (optional, default None)
Path to a GeoTiff raster for the prediction results. If not specified then
the output is written to a temporary file.

driver : str (default 'GTiff')
Named of GDAL-supported driver for file export

dtype : str (optional, default None)
Optionally specify a numpy compatible data type when saving to file. If not
specified, a data type is set based on the data types of the prediction.

nodata : any number (optional, default None)
Nodata value for file export. If not specified then the nodata value is
derived from the minimum permissible value for the given data type.

as_df : bool (default is False)
Whether to read the raster data via pandas before prediction. This can be
useful if transformers are being used as part of a pipeline and you want
to refer to column names rather than indices.

n_jobs : int (default -1)
Number of processing cores to use for parallel execution. Default is
n_jobs=1. -1 is all cores; -2 is all cores -1.

progress : bool (default False)
Show progress bar for prediction.

Returns
-------
pyspatial.Raster
Raster object containing prediction results as a RasterLayers. For
classification and regression models, the Raster will contain a single
RasterLayer, unless the model is multi-class or multi-target. Layers
are named automatically as `pred_raw_n` with n = 1, 2, 3 ..n.
"""
file_path, tfile = _file_path_tempfile(file_path)
n_jobs = _get_num_workers(n_jobs)

# determine output count for multi output cases
window = Window(0, 0, self.width, 1)

if as_df is False:
img = self.read(masked=True, window=window)
n_features, rows, cols = img.shape[0], img.shape[1], img.shape[2]
n_samples = rows * cols
flat_pixels = img.transpose(1, 2, 0).reshape((n_samples, n_features))
flat_pixels = flat_pixels.filled(0)
else:
flat_pixels = self.read(masked=False, as_df=True)
flat_pixels = flat_pixels.fillna(0)

result = estimator.predict(flat_pixels)

if result.ndim > 1:
n_outputs = result.shape[result.ndim - 1]
else:
n_outputs = 1

indexes = np.arange(0, n_outputs)

# chose prediction function
if len(indexes) == 1:
predfun = partial(self._predfun_dist, estimator=estimator)
else:
predfun = partial(self._predfun_dist_multioutput, estimator=estimator)

if dtype is None:
dtype = result.dtype

if nodata is None:
nodata = _get_nodata(dtype)

if progress is True:
disable_tqdm = False
else:
disable_tqdm = True

# open output file with updated metadata
meta = deepcopy(self.meta)
meta.update(driver=driver, count=len(indexes), dtype=dtype, nodata=nodata)

with rasterio.open(file_path, "w", **meta) as dst:
windows = [window for window in self.block_shapes(*self._block_shape)]

# generator gets raster arrays for each window
data_gen = ((window, self.read(window=window, masked=True, as_df=as_df)) for window in windows)

with concurrent.futures.ThreadPoolExecutor(max_workers=n_jobs) as executor:
for window, result, pbar in zip(windows, executor.map(predfun, data_gen), tqdm(windows, disable=disable_tqdm)):
result = np.ma.filled(result, fill_value=nodata)
dst.write(result[indexes, :, :].astype(dtype), window=window)

# generate layer names
prefix = "pred_raw_"
names = [prefix + str(i) for i in range(len(indexes))]

# create new raster object
new_raster = self._new_raster(file_path, names)

if tfile is not None:
for layer in new_raster.iloc:
layer._close = tfile.close

return new_raster

def _predfun_dist(self, img, estimator):
"""Prediction function for classification or regression response.

Parameters
----
img : tuple (window, numpy.ndarray)
A window object, and a 3d ndarray of raster data with the dimensions in
order of (band, rows, columns).

estimator : estimator object implementing 'fit'
The object to use to fit the data.

Returns
-------
numpy.ndarray
2d numpy array representing a single band raster containing the
classification or regression result.
"""
window, img = img

if not isinstance(img, pd.DataFrame):
# reshape each image block matrix into a 2D matrix
# first reorder into rows, cols, bands(transpose)
# then resample into 2D array (rows=sample_n, cols=band_values)
n_features, rows, cols = img.shape[0], img.shape[1], img.shape[2]
n_samples = rows * cols
flat_pixels = img.transpose(1, 2, 0).reshape((n_samples, n_features))

# create mask for NaN values and replace with number
flat_pixels_mask = flat_pixels.mask.copy()
flat_pixels = flat_pixels.filled(0)

else:
flat_pixels = img
flat_pixels_mask = pd.isna(flat_pixels).values
flat_pixels = flat_pixels.fillna(0)
flat_pixels = flat_pixels.values

# predict and replace mask
result_cla = estimator.pred_dist(flat_pixels)
result_cla = result_cla.params['scale']
result_cla = np.ma.masked_array(
data=result_cla, mask=flat_pixels_mask.any(axis=1)
)

# reshape the prediction from a 1D into 3D array [band, row, col]
result_cla = result_cla.reshape((1, window.height, window.width))

return result_cla

####################

@staticmethod
def _probfun(img, estimator):
"""Class probabilities function.
Expand Down