From becbea80ca66cff8f8fe018ca19e572848305550 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Sun, 21 Jan 2024 10:13:32 -0500 Subject: [PATCH 1/9] Initial refresh to existing raster docs. --- docs/source/api/raster-functions.rst | 1179 ++++++----------- python/mosaic/api/raster.py | 444 ++++--- .../expressions/raster/RST_BandMetaData.scala | 2 +- .../expressions/raster/RST_BoundingBox.scala | 2 +- .../mosaic/expressions/raster/RST_Clip.scala | 4 +- .../expressions/raster/RST_CombineAvg.scala | 2 +- .../raster/RST_CombineAvgAgg.scala | 2 +- .../expressions/raster/RST_DerivedBand.scala | 2 +- .../raster/RST_DerivedBandAgg.scala | 2 +- .../expressions/raster/RST_FromBands.scala | 2 +- .../expressions/raster/RST_FromContent.scala | 4 +- .../expressions/raster/RST_FromFile.scala | 2 +- .../expressions/raster/RST_GeoReference.scala | 2 +- .../expressions/raster/RST_GetNoData.scala | 5 +- .../raster/RST_GetSubdataset.scala | 2 +- .../expressions/raster/RST_Height.scala | 2 +- .../expressions/raster/RST_InitNoData.scala | 2 +- .../expressions/raster/RST_IsEmpty.scala | 2 +- .../expressions/raster/RST_MapAlgebra.scala | 2 +- .../expressions/raster/RST_MemSize.scala | 2 +- .../mosaic/expressions/raster/RST_Merge.scala | 2 +- .../expressions/raster/RST_MergeAgg.scala | 2 +- .../expressions/raster/RST_MetaData.scala | 2 +- .../mosaic/expressions/raster/RST_NDVI.scala | 2 +- .../expressions/raster/RST_NumBands.scala | 2 +- .../expressions/raster/RST_PixelHeight.scala | 2 +- .../expressions/raster/RST_PixelWidth.scala | 2 +- .../raster/RST_RasterToGridAvg.scala | 2 +- .../raster/RST_RasterToGridCount.scala | 2 +- .../raster/RST_RasterToGridMax.scala | 2 +- .../raster/RST_RasterToGridMedian.scala | 2 +- .../raster/RST_RasterToGridMin.scala | 2 +- .../raster/RST_RasterToWorldCoord.scala | 2 +- .../raster/RST_RasterToWorldCoordX.scala | 2 +- .../raster/RST_RasterToWorldCoordY.scala | 2 +- .../expressions/raster/RST_ReTile.scala | 2 +- .../expressions/raster/RST_Rotation.scala | 2 +- .../mosaic/expressions/raster/RST_SRID.scala | 2 +- .../expressions/raster/RST_ScaleX.scala | 2 +- .../expressions/raster/RST_ScaleY.scala | 2 +- .../expressions/raster/RST_SetNoData.scala | 2 +- .../mosaic/expressions/raster/RST_SkewX.scala | 2 +- .../mosaic/expressions/raster/RST_SkewY.scala | 2 +- .../expressions/raster/RST_Subdatasets.scala | 2 +- .../expressions/raster/RST_Subdivide.scala | 5 +- .../expressions/raster/RST_Summary.scala | 2 +- .../expressions/raster/RST_Tessellate.scala | 2 +- .../raster/RST_ToOverlappingTiles.scala | 2 +- .../expressions/raster/RST_TryOpen.scala | 2 +- .../expressions/raster/RST_UpperLeftX.scala | 2 +- .../expressions/raster/RST_UpperLeftY.scala | 2 +- .../mosaic/expressions/raster/RST_Width.scala | 2 +- .../raster/RST_WorldToRasterCoord.scala | 2 +- .../raster/RST_WorldToRasterCoordX.scala | 2 +- .../raster/RST_WorldToRasterCoordY.scala | 2 +- 55 files changed, 684 insertions(+), 1055 deletions(-) diff --git a/docs/source/api/raster-functions.rst b/docs/source/api/raster-functions.rst index 19a8cc42c..27442fec2 100644 --- a/docs/source/api/raster-functions.rst +++ b/docs/source/api/raster-functions.rst @@ -11,12 +11,15 @@ Mainly raster to grid functions, which are useful for reprojecting the raster da This is useful for performing spatial joins between raster data and vector data. Mosaic also provides a scalable retiling function that can be used to retile raster data in case of bottlenecking due to large files. All raster functions respect the \"rst\_\" prefix naming convention. -In versions <= 0.3.11 mosaic was operating using either string paths or byte arrays. -In versions > 0.3.11 mosaic is operating using tile objects only. Tile objects are created using rst_fromfile(path_to_raster) function. +Mosaic is operating using raster tile objects only since 0.3.11. Tile objects are created using functions such as rst_fromfile(path_to_raster) +or rst_fromcontent(raster_bin). If you use spark.read.format("gdal") tiles are automatically generated for you. +Also, scala does not have a df.display method while python does. In practice you would most often call display(df) in +scala for a prettier output, but for brevity, we write df.show in scala. -.. note:: For mosaic versions > 0.3.11 please do not use setup_gdal call. There is no longer a need for shared objects to be copied around. - Please use the updated init_script.sh script to install GDAL on your cluster. See :doc:`Install and Enable GDAL with Mosaic ` for more details. +.. note:: For mosaic versions > 0.4.0 you can use the revamped setup_gdal function or new setup_fuse_install. + These functions will configure an init script in your preferred Workspace, Volume, or DBFS location to install GDAL on your cluster. + See :doc:`Install and Enable GDAL with Mosaic ` for more details. rst_bandmetadata **************** @@ -37,8 +40,6 @@ rst_bandmetadata .. tabs:: .. code-tab:: py - df = spark.read.format("gdal").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/gdal-netcdf-coral") df.select(mos.rst_bandmetadata("tile", F.lit(1))).limit(1).display() +--------------------------------------------------------------------------------------+ | rst_bandmetadata(tile, 1) | @@ -55,10 +56,7 @@ rst_bandmetadata .. code-tab:: scala - val df = spark.read - .format("gdal").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_bandmetadata(col("tile"), lit(1))).limit(1).show(false) + df.select(rst_bandmetadata(col("tile"), lit(1))).limit(1).show +--------------------------------------------------------------------------------------+ | rst_bandmetadata(tile, 1) | +--------------------------------------------------------------------------------------+ @@ -74,10 +72,7 @@ rst_bandmetadata .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extension "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_bandmetadata(tile, 1) FROM coral_netcdf LIMIT 1 + SELECT rst_bandmetadata(tile, 1) FROM table LIMIT 1 +--------------------------------------------------------------------------------------+ | rst_bandmetadata(tile, 1) | +--------------------------------------------------------------------------------------+ @@ -94,7 +89,7 @@ rst_bandmetadata rst_boundingbox *************** -.. function:: rst_boundingbox(raster) +.. function:: rst_boundingbox(tile) Returns the bounding box of the raster as a polygon geometry. @@ -107,8 +102,6 @@ rst_boundingbox .. tabs:: .. code-tab:: py - df = spark.read.format("gdal").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/gdal-netcdf-coral") df.select(mos.rst_boundingbox("tile")).limit(1).display() +------------------------------------------------------------------+ | rst_boundingbox(tile) | @@ -118,10 +111,7 @@ rst_boundingbox .. code-tab:: scala - val df = spark.read - .format("gdal").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_boundingbox(col("tile"))).limit(1).show(false) + df.select(rst_boundingbox(col("tile"))).limit(1).show +------------------------------------------------------------------+ | rst_boundingbox(tile) | +------------------------------------------------------------------+ @@ -130,10 +120,7 @@ rst_boundingbox .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extension "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_boundingbox(tile) FROM coral_netcdf LIMIT 1 + SELECT rst_boundingbox(tile) FROM table LIMIT 1 +------------------------------------------------------------------+ | rst_boundingbox(tile) | +------------------------------------------------------------------+ @@ -143,9 +130,9 @@ rst_boundingbox rst_clip ******** -.. function:: rst_clip(raster, geometry) +.. function:: rst_clip(tile, geometry) - Clips the raster to the geometry. + Clips the raster tile to the supported geometry (WKB, WKT, GeoJSON). The geometry is expected to be in the same coordinate reference system as the raster. The geometry is expected to be a polygon or a multipolygon. The output raster will have the same extent as the input geometry. @@ -165,8 +152,6 @@ rst_clip .. tabs:: .. code-tab:: py - df = spark.read.format("gdal").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/gdal-netcdf-coral") df.select(mos.rst_clip("tile", F.lit("POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))"))).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ | rst_clip(tile, POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0))) | @@ -176,10 +161,7 @@ rst_clip .. code-tab:: scala - val df = spark.read - .format("gdal").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_clip(col("tile"), lit("POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))"))).limit(1).show(false) + df.select(rst_clip(col("tile"), lit("POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))"))).limit(1).show +----------------------------------------------------------------------------------------------------------------+ | rst_clip(tile, POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0))) | +-----------------------------------------------------------------------------------------------------------------+ @@ -188,10 +170,7 @@ rst_clip .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extension "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_clip(tile, "POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))") FROM coral_netcdf LIMIT 1 + SELECT rst_clip(tile, "POLYGON((0 0, 0 10, 10 10, 10 0, 0 0))") FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ | rst_clip(tile, POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0))) | +----------------------------------------------------------------------------------------------------------------+ @@ -201,9 +180,9 @@ rst_clip rst_combineavg ************** -.. function:: rst_combineavg(rasters) +.. function:: rst_combineavg(tiles) - Combines a collection of rasters by averaging the pixel values. + Combines a collection of raster tiles by averaging the pixel values. The rasters must have the same extent, number of bands, and pixel type. The rasters must have the same pixel size and coordinate reference system. The output raster will have the same extent as the input rasters. @@ -212,7 +191,7 @@ rst_combineavg The output raster will have the same pixel size as the input rasters. The output raster will have the same coordinate reference system as the input rasters. - :param tile: A column containing an array of raster tiles. + :param tiles: A column containing an array of raster tiles. :type col: Column (ArrayType(RasterTileType)) :rtype: Column: RasterTileType @@ -221,40 +200,31 @@ rst_combineavg .. tabs:: .. code-tab:: py - df = spark.read.format("gdal").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/gdal-netcdf-coral")\ - .groupBy().agg(F.collect_list("tile").alias("tile")) - df.select(mos.rst_combineavg("tile")).limit(1).display() + df\ + .select(F.array("tile1","tile2","tile3")).alias("tiles"))\ + .select(mos.rst_combineavg("tiles")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ - | rst_combineavg(tile) | + | rst_combineavg(tiles) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("gdal").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - .groupBy().agg(collect_list(col("tile")).as("tile")) - df.select(rst_combineavg(col("tile"))).limit(1).show(false) + df + .select(F.array("tile1","tile2","tile3")).as("tiles")) + .select(rst_combineavg(col("tiles"))).limit(1).show +----------------------------------------------------------------------------------------------------------------+ - | rst_combineavg(tile) | + | rst_combineavg(tiles) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extension "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - WITH grouped as ( - SELECT collect_list(tile) as tile FROM coral_netcdf - ) - SELECT rst_combineavg(tile) FROM grouped LIMIT 1 + SELECT rst_combineavg(array(tile1,tile2,tile3)) FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ - | rst_combineavg(tile) | + | rst_combineavg(array(tile1,tile2,tile3)) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -262,9 +232,9 @@ rst_combineavg rst_combineavgagg ***************** -.. function:: rst_combineavgagg(rasters) +.. function:: rst_combineavgagg(tile) - Combines a group by statement over rasters by averaging the pixel values. + Combines a group by statement over aggregated raster tiles by averaging the pixel values. The rasters must have the same extent, number of bands, and pixel type. The rasters must have the same pixel size and coordinate reference system. The output raster will have the same extent as the input rasters. @@ -273,7 +243,7 @@ rst_combineavgagg The output raster will have the same pixel size as the input rasters. The output raster will have the same coordinate reference system as the input rasters. - :param tile: A column containing raster tiles. + :param tile: A grouped column containing raster tiles. :type col: Column (ArrayType(RasterTileType)) :rtype: Column: RasterTileType @@ -282,9 +252,8 @@ rst_combineavgagg .. tabs:: .. code-tab:: py - df = spark.read.format("gdal").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/gdal-netcdf-coral")\ - df.groupBy().agg(mos.rst_combineavgagg("tile")).limit(1).display() + df.groupBy()\ + .agg(mos.rst_combineavgagg("tile").limit(1).display() +----------------------------------------------------------------------------------------------------------------+ | rst_combineavgagg(tile) | +----------------------------------------------------------------------------------------------------------------+ @@ -293,10 +262,8 @@ rst_combineavgagg .. code-tab:: scala - val df = spark.read - .format("gdal").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.groupBy().agg(rst_combineavgagg(col("tile"))).limit(1).show(false) + df.groupBy() + .agg(rst_combineavgagg(col("tile")).limit(1).show +----------------------------------------------------------------------------------------------------------------+ | rst_combineavgagg(tile) | +----------------------------------------------------------------------------------------------------------------+ @@ -305,11 +272,8 @@ rst_combineavgagg .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extension "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") SELECT rst_combineavgagg(tile) - FROM coral_netcdf + FROM table GROUP BY 1 +----------------------------------------------------------------------------------------------------------------+ | rst_combineavgagg(tile) | @@ -320,9 +284,9 @@ rst_combineavgagg rst_frombands ************** -.. function:: rst_frombands(rasters) +.. function:: rst_frombands(tiles) - Combines a collection of rasters into a single raster. + Combines a collection of raster tiles of different bands into a single raster. The rasters must have the same extent. The rasters must have the same pixel coordinate reference system. The output raster will have the same extent as the input rasters. @@ -331,7 +295,7 @@ rst_frombands The output raster will have the same pixel size as the highest resolution input rasters. The output raster will have the same coordinate reference system as the input rasters. - :param tile: A column containing an array of raster tiles. + :param tiles: A column containing an array of raster tiles. :type col: Column (ArrayType(RasterTileType)) :rtype: Column: RasterTileType @@ -340,40 +304,30 @@ rst_frombands .. tabs:: .. code-tab:: py - df = spark.read.format("gdal").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/gdal-netcdf-coral")\ - .groupBy().agg(F.collect_list("tile").alias("tile")) - df.select(mos.rst_frombands("tile")).limit(1).display() + df.select(F.array("tile1", "tile2", "tile3").as("tiles"))\ + .select(mos.rst_frombands("tiles")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ - | rst_frombands(tile) | + | rst_frombands(tiles) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("gdal").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - .groupBy().agg(collect_list(col("tile")).as("tile")) - df.select(rst_frombands(col("tile"))).limit(1).show(false) + df + .select(array("tile1", "tile2", "tile3").as("tiles")) + .select(rst_frombands(col("tiles"))).limit(1).show +----------------------------------------------------------------------------------------------------------------+ - | rst_frombands(tile) | + | rst_frombands(tiles) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extension "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - WITH grouped as ( - SELECT collect_list(tile) as tile FROM coral_netcdf - ) - SELECT rst_frombands(tile) FROM grouped LIMIT 1 + SELECT rst_frombands(array(tile1,tile2,tile3)) FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ - | rst_frombands(tile) | + | rst_frombands(array(tile1,tile2,tile3)) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -388,8 +342,8 @@ rst_fromfile The file path must be a valid path to a raster file. The file path must be a path to a file that GDAL can read. If the size_in_MB parameter is specified, the raster will be split into tiles of the specified size. - If the size_in_MB parameter is not specified, the raster will not be split into tiles. - If the size_in_Mb < 0 the raster wont be split into tiles. + If the size_in_MB parameter is not specified or if the size_in_Mb < 0, the raster will only be split if + it exceeds Integer.MAX_VALUE. The split will be at a threshold of 64MB in this case. :param path: A column containing the path to a raster file. :type col: Column (StringType) @@ -403,7 +357,8 @@ rst_fromfile .. code-tab:: py df = spark.read.format("binaryFile")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") + .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral")\ + .drop("content") df.select(mos.rst_fromfile("path")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ | rst_fromfile(path) | @@ -416,6 +371,7 @@ rst_fromfile val df = spark.read .format("binaryFile") .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") + .drop("content") df.select(rst_fromfile(col("path"))).limit(1).show(false) +----------------------------------------------------------------------------------------------------------------+ | rst_fromfile(path) | @@ -432,13 +388,15 @@ rst_fromfile +----------------------------------------------------------------------------------------------------------------+ | rst_fromfile(path) | +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ rst_georeference **************** -.. function:: rst_georeference(raster) +.. function:: rst_georeference(raster_tile) - Returns GeoTransform of the raster as a GT array of doubles. + Returns GeoTransform of the raster tile as a GT array of doubles. GT(0) x-coordinate of the upper-left corner of the upper-left pixel. GT(1) w-e pixel resolution / pixel width. GT(2) row rotation (typically zero). @@ -455,11 +413,9 @@ rst_georeference .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_georeference("path")).limit(1).display() + df.select(mos.rst_georeference("tile")).limit(1).display() +--------------------------------------------------------------------------------------------+ - | rst_georeference(path) | + | rst_georeference(tile) | +--------------------------------------------------------------------------------------------+ | {"scaleY": -0.049999999152053956, "skewX": 0, "skewY": 0, "upperLeftY": 89.99999847369712, | | "upperLeftX": -180.00000610436345, "scaleX": 0.050000001695656514} | @@ -467,12 +423,9 @@ rst_georeference .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_georeference(col("path"))).limit(1).show() + df.select(rst_georeference(col("tile"))).limit(1).show +--------------------------------------------------------------------------------------------+ - | rst_georeference(path) | + | rst_georeference(tile) | +--------------------------------------------------------------------------------------------+ | {"scaleY": -0.049999999152053956, "skewX": 0, "skewY": 0, "upperLeftY": 89.99999847369712, | | "upperLeftX": -180.00000610436345, "scaleX": 0.050000001695656514} | @@ -480,12 +433,9 @@ rst_georeference .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_georeference(path) FROM coral_netcdf LIMIT 1 + SELECT rst_georeference(tile) FROM table LIMIT 1 +--------------------------------------------------------------------------------------------+ - | rst_georeference(path) | + | rst_georeference(tile) | +--------------------------------------------------------------------------------------------+ | {"scaleY": -0.049999999152053956, "skewX": 0, "skewY": 0, "upperLeftY": 89.99999847369712, | | "upperLeftX": -180.00000610436345, "scaleX": 0.050000001695656514} | @@ -494,9 +444,9 @@ rst_georeference rest_getnodata ************** -.. function:: rst_getnodata(raster) +.. function:: rst_getnodata(tile) - Returns the nodata value of the raster bands. + Returns the nodata value of the raster tile bands. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. :type col: Column (RasterTileType) @@ -507,35 +457,27 @@ rest_getnodata .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_getnodata("path")).limit(1).display() + df.select(mos.rst_getnodata("tile")).limit(1).display() +---------------------+ - | rst_getnodata(path) | + | rst_getnodata(tile) | +---------------------+ | [0.0, -9999.0, ...] | +---------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_getnodata(col("path"))).limit(1).show() + df.select(rst_getnodata(col("tile"))).limit(1).show +---------------------+ - | rst_getnodata(path) | + | rst_getnodata(tile) | +---------------------+ | [0.0, -9999.0, ...] | +---------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_getnodata(path) FROM coral_netcdf LIMIT 1 + SELECT rst_getnodata(tile) FROM table LIMIT 1 +---------------------+ - | rst_getnodata(path) | + | rst_getnodata(tile) | +---------------------+ | [0.0, -9999.0, ...] | +---------------------+ @@ -543,9 +485,9 @@ rest_getnodata rst_getsubdataset ***************** -.. function:: rst_getsubdataset(raster, name) +.. function:: rst_getsubdataset(tile, name) - Returns the subdataset of the raster with a given name. + Returns the subdataset of the raster tile with a given name. The subdataset name must be a string. The name is not a full path. The name is the last identifier in the subdataset path (FORMAT:PATH:NAME). The subdataset name must be a valid subdataset name for the raster. @@ -561,35 +503,27 @@ rst_getsubdataset .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_getsubdataset("path", "sst")).limit(1).display() + df.select(mos.rst_getsubdataset("tile", "sst")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ - | rst_getsubdataset(path, sst) | + | rst_getsubdataset(tile, sst) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_getsubdataset(col("path"), lit("sst"))).limit(1).show(false) + df.select(rst_getsubdataset(col("tile"), lit("sst"))).limit(1).show +----------------------------------------------------------------------------------------------------------------+ - | rst_getsubdataset(path, sst) | + | rst_getsubdataset(tile, sst) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_getsubdataset(path, "sst") FROM coral_netcdf LIMIT 1 + SELECT rst_getsubdataset(tile, "sst") FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ - | rst_getsubdataset(path, sst) | + | rst_getsubdataset(tile, sst) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -597,9 +531,9 @@ rst_getsubdataset rst_height ********** -.. function:: rst_height(raster) +.. function:: rst_height(tile) - Returns the height of the raster in pixels. + Returns the height of the raster tile in pixels. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. :type col: Column (RasterTileType) @@ -610,11 +544,9 @@ rst_height .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_height('path')).show() + df.select(mos.rst_height('tile')).display() +--------------------+ - | rst_height(path) | + | rst_height(tile) | +--------------------+ | 3600 | | 3600 | @@ -622,12 +554,9 @@ rst_height .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_height(col("path"))).show() + df.select(rst_height(col("tile"))).show +--------------------+ - | rst_height(path) | + | rst_height(tile) | +--------------------+ |3600 | |3600 | @@ -635,12 +564,9 @@ rst_height .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_height(path) FROM coral_netcdf + SELECT rst_height(tile) FROM table +--------------------+ - | rst_height(path) | + | rst_height(tile) | +--------------------+ |3600 | |3600 | @@ -649,9 +575,9 @@ rst_height rst_initnodata ************** -.. function:: rst_initnodata(raster) +.. function:: rst_initnodata(tile) - Initializes the nodata value of the raster bands. + Initializes the nodata value of the raster tile bands. The nodata value will be set to default values for the pixel type of the raster bands. The output raster will have the same extent as the input raster. The default nodata value for ByteType is 0. @@ -671,35 +597,27 @@ rst_initnodata .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_initnodata("path")).limit(1).display() + df.select(mos.rst_initnodata("tile")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ - | rst_initnodata(path) | + | rst_initnodata(tile) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_initnodata(col("path"))).limit(1).show(false) + df.select(rst_initnodata(col("tile"))).limit(1).show +----------------------------------------------------------------------------------------------------------------+ - | rst_initnodata(path) | + | rst_initnodata(tile) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_initnodata(path) FROM coral_netcdf LIMIT 1 + SELECT rst_initnodata(tile) FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ - | rst_initnodata(path) | + | rst_initnodata(tile) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -707,9 +625,9 @@ rst_initnodata rst_isempty ************* -.. function:: rst_isempty(raster) +.. function:: rst_isempty(tile) - Returns true if the raster is empty. + Returns true if the raster tile is empty. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. :type col: Column (RasterTileType) @@ -720,11 +638,9 @@ rst_isempty .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_isempty('path')).show() + df.select(mos.rst_isempty('tile')).display() +--------------------+ - | rst_height(path) | + | rst_height(tile) | +--------------------+ |false | |false | @@ -732,12 +648,9 @@ rst_isempty .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_isempty(col("path"))).show() + df.select(rst_isempty(col("tile"))).show +--------------------+ - | rst_height(path) | + | rst_height(tile) | +--------------------+ |false | |false | @@ -745,12 +658,9 @@ rst_isempty .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_isempty(path) FROM coral_netcdf + SELECT rst_isempty(tile) FROM table +--------------------+ - | rst_height(path) | + | rst_height(tile) | +--------------------+ |false | |false | @@ -759,9 +669,9 @@ rst_isempty rst_memsize ************* -.. function:: rst_memsize(raster) +.. function:: rst_memsize(tile) - Returns size of the raster in bytes. + Returns size of the raster tile in bytes. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. :type col: Column (RasterTileType) @@ -772,11 +682,9 @@ rst_memsize .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_memsize('path')).show() + df.select(mos.rst_memsize('tile')).display() +--------------------+ - | rst_height(path) | + | rst_height(tile) | +--------------------+ |730260 | |730260 | @@ -784,12 +692,9 @@ rst_memsize .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_memsize(col("path"))).show() + df.select(rst_memsize(col("tile"))).show +--------------------+ - | rst_height(path) | + | rst_height(tile) | +--------------------+ |730260 | |730260 | @@ -797,12 +702,9 @@ rst_memsize .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_memsize(path) FROM coral_netcdf + SELECT rst_memsize(tile) FROM table +--------------------+ - | rst_height(path) | + | rst_height(tile) | +--------------------+ |730260 | |730260 | @@ -811,9 +713,9 @@ rst_memsize rst_merge ********* -.. function:: rst_merge(rasters) +.. function:: rst_merge(tiles) - Combines a collection of rasters into a single raster. + Combines a collection of raster tiles into a single raster. The rasters do not need to have the same extent. The rasters must have the same coordinate reference system. The rasters are combined using gdalwarp. @@ -834,40 +736,29 @@ rst_merge .. tabs:: .. code-tab:: py - df = spark.read.format("gdal").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/gdal-netcdf-coral")\ - .groupBy().agg(F.collect_list("tile").alias("tile")) - df.select(mos.rst_merge("tile")).limit(1).display() + df.select(F.array("tile1", "tile2", "tile3").alias("tiles"))\ + .select(mos.rst_merge("tiles")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ - | rst_merge(tile) | + | rst_merge(tiles) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("gdal").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - .groupBy().agg(collect_list(col("tile")).as("tile")) - df.select(rst_merge(col("tile"))).limit(1).show(false) + df.select(array("tile1", "tile2", "tile3").as("tiles")) + .select(rst_merge(col("tiles"))).limit(1).show +----------------------------------------------------------------------------------------------------------------+ - | rst_merge(tile) | + | rst_merge(tiles) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extension "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - WITH grouped as ( - SELECT collect_list(tile) as tile FROM coral_netcdf - ) - SELECT rst_merge(tile) FROM grouped LIMIT 1 + SELECT rst_merge(array(tile1,tile2,tile3)) FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ - | rst_merge(tile) | + | rst_merge(array(tile1,tile2,tile3)) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -875,9 +766,9 @@ rst_merge rst_mergeagg ************ -.. function:: rst_mergeagg(rasters) +.. function:: rst_mergeagg(tiles) - Combines a collection of rasters into a single raster. + Combines a grouped aggregate of raster tiles into a single raster. The rasters do not need to have the same extent. The rasters must have the same coordinate reference system. The rasters are combined using gdalwarp. @@ -901,9 +792,8 @@ rst_mergeagg .. tabs:: .. code-tab:: py - df = spark.read.format("gdal").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/gdal-netcdf-coral") - df.select(mos.rst_mergeagg("tile")).limit(1).display() + df.groupBy("date")\ + .agg(mos.rst_mergeagg("tile")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ | rst_mergeagg(tile) | +----------------------------------------------------------------------------------------------------------------+ @@ -912,10 +802,8 @@ rst_mergeagg .. code-tab:: scala - val df = spark.read - .format("gdal").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_mergeagg(col("tile"))).limit(1).show(false) + df.groupBy("date") + .agg(rst_mergeagg(col("tile"))).limit(1).show +----------------------------------------------------------------------------------------------------------------+ | rst_mergeagg(tile) | +----------------------------------------------------------------------------------------------------------------+ @@ -924,10 +812,9 @@ rst_mergeagg .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extension "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_mergeagg(tile) FROM coral_netcdf LIMIT 1 + SELECT rst_mergeagg(tile) + FROM table + GROUP BY date +----------------------------------------------------------------------------------------------------------------+ | rst_mergeagg(tile) | +----------------------------------------------------------------------------------------------------------------+ @@ -937,9 +824,9 @@ rst_mergeagg rst_metadata ************* -.. function:: rst_metadata(raster) +.. function:: rst_metadata(tile) - Extract the metadata describing the raster. + Extract the metadata describing the raster tile. Metadata is return as a map of key value pairs. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. @@ -951,11 +838,9 @@ rst_metadata .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_metadata('path')).show() + df.select(mos.rst_metadata('tile')).display() +--------------------------------------------------------------------------------------------------------------------+ - | rst_metadata(path) | + | rst_metadata(tile) | +--------------------------------------------------------------------------------------------------------------------+ | {"NC_GLOBAL#publisher_url": "https://coralreefwatch.noaa.gov", "NC_GLOBAL#geospatial_lat_units": "degrees_north", | | "NC_GLOBAL#platform_vocabulary": "NOAA NODC Ocean Archive System Platforms", "NC_GLOBAL#creator_type": "group", | @@ -971,12 +856,9 @@ rst_metadata .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_metadata(col("path"))).show() + df.select(rst_metadata(col("tile"))).show +--------------------------------------------------------------------------------------------------------------------+ - | rst_metadata(path) | + | rst_metadata(tile) | +--------------------------------------------------------------------------------------------------------------------+ | {"NC_GLOBAL#publisher_url": "https://coralreefwatch.noaa.gov", "NC_GLOBAL#geospatial_lat_units": "degrees_north", | | "NC_GLOBAL#platform_vocabulary": "NOAA NODC Ocean Archive System Platforms", "NC_GLOBAL#creator_type": "group", | @@ -992,12 +874,9 @@ rst_metadata .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_metadata(path) FROM coral_netcdf LIMIT 1 + SELECT rst_metadata(tile) FROM table LIMIT 1 +--------------------------------------------------------------------------------------------------------------------+ - | rst_metadata(path) | + | rst_metadata(tile) | +--------------------------------------------------------------------------------------------------------------------+ | {"NC_GLOBAL#publisher_url": "https://coralreefwatch.noaa.gov", "NC_GLOBAL#geospatial_lat_units": "degrees_north", | | "NC_GLOBAL#platform_vocabulary": "NOAA NODC Ocean Archive System Platforms", "NC_GLOBAL#creator_type": "group", | @@ -1014,7 +893,7 @@ rst_metadata rst_ndvi ******** -.. function:: rst_ndvi(raster, red_band, nir_band) +.. function:: rst_ndvi(tile, red_band_num, nir_band_num) Calculates the Normalized Difference Vegetation Index (NDVI) for a raster. The NDVI is calculated using the formula: (NIR - RED) / (NIR + RED). @@ -1025,9 +904,9 @@ rst_ndvi :param tile: A column containing the raster tile. :type col: Column (RasterTileType) - :param red_band: A column containing the band number of the red band. + :param red_band_num: A column containing the band number of the red band. :type col: Column (IntegerType) - :param nir_band: A column containing the band number of the near infrared band. + :param nir_band_num: A column containing the band number of the near infrared band. :type col: Column (IntegerType) :rtype: Column: RasterTileType @@ -1036,35 +915,27 @@ rst_ndvi .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_ndvi("path", 1, 2)).limit(1).display() + df.select(mos.rst_ndvi("tile", 1, 2)).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ - | rst_ndvi(path, 1, 2) | + | rst_ndvi(tile, 1, 2) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_ndvi(col("path"), lit(1), lit(2))).limit(1).show(false) + df.select(rst_ndvi(col("tile"), lit(1), lit(2))).limit(1).show +----------------------------------------------------------------------------------------------------------------+ - | rst_ndvi(path, 1, 2) | + | rst_ndvi(tile, 1, 2) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_ndvi(path, 1, 2) FROM coral_netcdf LIMIT 1 + SELECT rst_ndvi(tile, 1, 2) FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ - | rst_ndvi(path, 1, 2) | + | rst_ndvi(tile, 1, 2) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -1072,9 +943,9 @@ rst_ndvi rst_numbands ************* -.. function:: rst_numbands(raster) +.. function:: rst_numbands(tile) - Returns number of bands in the raster. + Returns number of bands in the raster tile. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. :type col: Column (RasterTileType) @@ -1085,11 +956,9 @@ rst_numbands .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_numbands('path')).show() + df.select(mos.rst_numbands('tile')).display() +---------------------+ - | rst_numbands(path) | + | rst_numbands(tile) | +---------------------+ | 1 | | 1 | @@ -1097,12 +966,9 @@ rst_numbands .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_metadata(col("path"))).show() + df.select(rst_metadata(col("tile"))).show +---------------------+ - | rst_numbands(path) | + | rst_numbands(tile) | +---------------------+ | 1 | | 1 | @@ -1110,12 +976,9 @@ rst_numbands .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_metadata(path) + SELECT rst_metadata(tile) FROM table +---------------------+ - | rst_numbands(path) | + | rst_numbands(tile) | +---------------------+ | 1 | | 1 | @@ -1124,9 +987,9 @@ rst_numbands rst_pixelheight *************** -.. function:: rst_pixelheight(raster) +.. function:: rst_pixelheight(tile) - Returns the height of the pixel in the raster derived via GeoTransform. + Returns the height of the pixel in the raster tile derived via GeoTransform. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. :type col: Column (RasterTileType) @@ -1137,11 +1000,9 @@ rst_pixelheight .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_pixelheight('path')).show() + df.select(mos.rst_pixelheight('tile')).display() +-----------------------+ - | rst_pixelheight(path) | + | rst_pixelheight(tile) | +-----------------------+ | 1 | | 1 | @@ -1149,12 +1010,9 @@ rst_pixelheight .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_pixelheight(col("path"))).show() + df.select(rst_pixelheight(col("tile"))).show +-----------------------+ - | rst_pixelheight(path) | + | rst_pixelheight(tile) | +-----------------------+ | 1 | | 1 | @@ -1162,12 +1020,9 @@ rst_pixelheight .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_pixelheight(path) + SELECT rst_pixelheight(tile) FROM table +-----------------------+ - | rst_pixelheight(path) | + | rst_pixelheight(tile) | +-----------------------+ | 1 | | 1 | @@ -1176,9 +1031,9 @@ rst_pixelheight rst_pixelwidth ************** -.. function:: rst_pixelwidth(raster) +.. function:: rst_pixelwidth(tile) - Returns the width of the pixel in the raster derived via GeoTransform. + Returns the width of the pixel in the raster tile derived via GeoTransform. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. :type col: Column (RasterTileType) @@ -1189,11 +1044,9 @@ rst_pixelwidth .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_pixelwidth('path')).show() + df.select(mos.rst_pixelwidth('tile')).display() +---------------------+ - | rst_pixelwidth(path)| + | rst_pixelwidth(tile)| +---------------------+ | 1 | | 1 | @@ -1201,12 +1054,9 @@ rst_pixelwidth .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_pixelwidth(col("path"))).show() + df.select(rst_pixelwidth(col("tile"))).show +---------------------+ - | rst_pixelwidth(path)| + | rst_pixelwidth(tile)| +---------------------+ | 1 | | 1 | @@ -1214,12 +1064,9 @@ rst_pixelwidth .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_pixelwidth(path) + SELECT rst_pixelwidth(tile) FROM table +---------------------+ - | rst_pixelwidth(path)| + | rst_pixelwidth(tile)| +---------------------+ | 1 | | 1 | @@ -1228,7 +1075,7 @@ rst_pixelwidth rst_rastertogridavg ******************* -.. function:: rst_rastertogridavg(raster, resolution) +.. function:: rst_rastertogridavg(tile, resolution) The result is a 2D array of cells, where each cell is a struct of (cellID, value). For getting the output of cellID->value pairs, please use explode() function twice. @@ -1246,11 +1093,9 @@ rst_rastertogridavg .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_rastertogridavg('path', F.lit(3))).show() + df.select(mos.rst_rastertogridavg('tile', F.lit(3))).display() +--------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridavg(path, 3) | + | rst_rastertogridavg(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1263,12 +1108,9 @@ rst_rastertogridavg .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_rastertogridavg(col("path"), lit(3))).show() + df.select(rst_rastertogridavg(col("tile"), lit(3))).show +--------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridavg(path, 3) | + | rst_rastertogridavg(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1281,12 +1123,9 @@ rst_rastertogridavg .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_rastertogridavg(path, 3) + SELECT rst_rastertogridavg(tile, 3) FROM table +--------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridavg(path, 3) | + | rst_rastertogridavg(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1300,12 +1139,12 @@ rst_rastertogridavg .. figure:: ../images/rst_rastertogridavg/h3.png :figclass: doc-figure - Fig 1. RST_RasterToGridAvg(raster, 3) + Fig 1. RST_RasterToGridAvg(tile, 3) rst_rastertogridcount ********************* -.. function:: rst_rastertogridcount(raster, resolution) +.. function:: rst_rastertogridcount(tile, resolution) The result is a 2D array of cells, where each cell is a struct of (cellID, value). For getting the output of cellID->value pairs, please use explode() function twice. @@ -1323,11 +1162,9 @@ rst_rastertogridcount .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_rastertogridcount('path', F.lit(3))).show() + df.select(mos.rst_rastertogridcount('tile', F.lit(3))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridcount(path, 3) | + | rst_rastertogridcount(tile, 3) | +------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1340,12 +1177,9 @@ rst_rastertogridcount .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_rastertogridcount(col("path"), lit(3))).show() + df.select(rst_rastertogridcount(col("tile"), lit(3))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridcount(path, 3) | + | rst_rastertogridcount(tile, 3) | +------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1358,12 +1192,9 @@ rst_rastertogridcount .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_rastertogridcount(path, 3) + SELECT rst_rastertogridcount(tile, 3) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridcount(path, 3) | + | rst_rastertogridcount(tile, 3) | +------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1377,12 +1208,12 @@ rst_rastertogridcount .. figure:: ../images/rst_rastertogridavg/h3.png :figclass: doc-figure - Fig 2. RST_RasterToGridCount(raster, 3) + Fig 2. RST_RasterToGridCount(tile, 3) rst_rastertogridmax ******************* -.. function:: rst_rastertogridmax(raster, resolution) +.. function:: rst_rastertogridmax(tile, resolution) The result is a 2D array of cells, where each cell is a struct of (cellID, value). For getting the output of cellID->value pairs, please use explode() function twice. @@ -1400,11 +1231,9 @@ rst_rastertogridmax .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_rastertogridmax('path', F.lit(3))).show() + df.select(mos.rst_rastertogridmax('tile', F.lit(3))).display() +--------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridmax(path, 3) | + | rst_rastertogridmax(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1417,12 +1246,9 @@ rst_rastertogridmax .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_rastertogridmax(col("path"), lit(3))).show() + df.select(rst_rastertogridmax(col("tile"), lit(3))).show +--------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridmax(path, 3) | + | rst_rastertogridmax(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1435,12 +1261,9 @@ rst_rastertogridmax .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_rastertogridmax(path, 3) + SELECT rst_rastertogridmax(tile, 3) FROM table +--------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridmax(path, 3) | + | rst_rastertogridmax(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1454,12 +1277,12 @@ rst_rastertogridmax .. figure:: ../images/rst_rastertogridavg/h3.png :figclass: doc-figure - Fig 3. RST_RasterToGridMax(raster, 3) + Fig 3. RST_RasterToGridMax(tile, 3) rst_rastertogridmedian ********************** -.. function:: rst_rastertogridmedian(raster, resolution) +.. function:: rst_rastertogridmedian(tile, resolution) The result is a 2D array of cells, where each cell is a struct of (cellID, value). For getting the output of cellID->value pairs, please use explode() function twice. @@ -1477,11 +1300,9 @@ rst_rastertogridmedian .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_rastertogridmedian('path', F.lit(3))).show() + df.select(mos.rst_rastertogridmedian('tile', F.lit(3))).display() +--------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridmedian(path, 3) | + | rst_rastertogridmedian(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1494,12 +1315,9 @@ rst_rastertogridmedian .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_rastertogridmedian(col("path"), lit(3))).show() + df.select(rst_rastertogridmedian(col("tile"), lit(3))).show +--------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridmedian(path, 3) | + | rst_rastertogridmedian(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1512,12 +1330,9 @@ rst_rastertogridmedian .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_rastertogridmax(path, 3) + SELECT rst_rastertogridmax(tile, 3) FROM table +--------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridmedian(path, 3) | + | rst_rastertogridmedian(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1531,12 +1346,12 @@ rst_rastertogridmedian .. figure:: ../images/rst_rastertogridavg/h3.png :figclass: doc-figure - Fig 4. RST_RasterToGridMedian(raster, 3) + Fig 4. RST_RasterToGridMedian(tile, 3) rst_rastertogridmin ******************* -.. function:: rst_rastertogridmin(raster, resolution) +.. function:: rst_rastertogridmin(tile, resolution) The result is a 2D array of cells, where each cell is a struct of (cellID, value). For getting the output of cellID->value pairs, please use explode() function twice. @@ -1554,11 +1369,9 @@ rst_rastertogridmin .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_rastertogridmin('path', F.lit(3))).show() + df.select(mos.rst_rastertogridmin('tile', F.lit(3))).display() +--------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridmin(path, 3) | + | rst_rastertogridmin(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1571,12 +1384,9 @@ rst_rastertogridmin .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_rastertogridmin(col("path"), lit(3))).show() + df.select(rst_rastertogridmin(col("tile"), lit(3))).show +--------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridmin(path, 3) | + | rst_rastertogridmin(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1589,12 +1399,9 @@ rst_rastertogridmin .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_rastertogridmin(path, 3) + SELECT rst_rastertogridmin(tile, 3) FROM table +--------------------------------------------------------------------------------------------------------------------+ - | rst_rastertogridmin(path, 3) | + | rst_rastertogridmin(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | | {"cellID": "593308294097928191", "measure": 0}, {"cellID": "593825202001936383", "measure": 0}, | @@ -1608,14 +1415,14 @@ rst_rastertogridmin .. figure:: ../images/rst_rastertogridavg/h3.png :figclass: doc-figure - Fig 4. RST_RasterToGridMin(raster, 3) + Fig 4. RST_RasterToGridMin(tile, 3) rst_rastertoworldcoord ********************** -.. function:: rst_rastertoworldcoord(raster, x, y) +.. function:: rst_rastertoworldcoord(tile, x, y) - Computes the world coordinates of the raster pixel at the given x and y coordinates. + Computes the world coordinates of the raster tile at the given x and y pixel coordinates. The result is a WKT point geometry. The coordinates are computed using the GeoTransform of the raster to respect the projection. @@ -1632,35 +1439,27 @@ rst_rastertoworldcoord .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_rastertoworldcoord('path', F.lit(3), F.lit(3))).show() + df.select(mos.rst_rastertoworldcoord('tile', F.lit(3), F.lit(3))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_rastertoworldcoord(path, 3, 3) | + | rst_rastertoworldcoord(tile, 3, 3) | +------------------------------------------------------------------------------------------------------------------+ |POINT (-179.85000609927647 89.84999847624096) | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_rastertoworldcoord(col("path"), lit(3), lit(3))).show() + df.select(rst_rastertoworldcoord(col("tile"), lit(3), lit(3))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_rastertoworldcoord(path, 3, 3) | + | rst_rastertoworldcoord(tile, 3, 3) | +------------------------------------------------------------------------------------------------------------------+ |POINT (-179.85000609927647 89.84999847624096) | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_rastertoworldcoord(path, 3, 3) + SELECT rst_rastertoworldcoord(tile, 3, 3) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_rastertoworldcoord(path, 3, 3) | + | rst_rastertoworldcoord(tile, 3, 3) | +------------------------------------------------------------------------------------------------------------------+ |POINT (-179.85000609927647 89.84999847624096) | +------------------------------------------------------------------------------------------------------------------+ @@ -1668,9 +1467,9 @@ rst_rastertoworldcoord rst_rastertoworldcoordx ********************** -.. function:: rst_rastertoworldcoord(raster, x, y) +.. function:: rst_rastertoworldcoord(tile, x, y) - Computes the world coordinates of the raster pixel at the given x and y coordinates. + Computes the world coordinates of the raster tile at the given x and y pixel coordinates. The result is the X coordinate of the point after applying the GeoTransform of the raster. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. @@ -1679,42 +1478,34 @@ rst_rastertoworldcoordx :type col: Column (IntegerType) :param y: y coordinate of the pixel. :type col: Column (IntegerType) - :rtype: Column: StringType + :rtype: Column: DoubleType :example: .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_rastertoworldcoordx('path', F.lit(3), F.lit(3))).show() + df.select(mos.rst_rastertoworldcoordx('tile', F.lit(3), F.lit(3))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_rastertoworldcoordx(path, 3, 3) | + | rst_rastertoworldcoordx(tile, 3, 3) | +------------------------------------------------------------------------------------------------------------------+ | -179.85000609927647 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_rastertoworldcoordx(col("path"), lit(3), lit(3))).show() + df.select(rst_rastertoworldcoordx(col("tile"), lit(3), lit(3))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_rastertoworldcoordx(path, 3, 3) | + | rst_rastertoworldcoordx(tile, 3, 3) | +------------------------------------------------------------------------------------------------------------------+ | -179.85000609927647 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_rastertoworldcoordx(path, 3, 3) + SELECT rst_rastertoworldcoordx(tile, 3, 3) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_rastertoworldcoordx(path, 3, 3) | + | rst_rastertoworldcoordx(tile, 3, 3) | +------------------------------------------------------------------------------------------------------------------+ | -179.85000609927647 | +------------------------------------------------------------------------------------------------------------------+ @@ -1722,9 +1513,9 @@ rst_rastertoworldcoordx rst_rastertoworldcoordy ********************** -.. function:: rst_rastertoworldcoordy(raster, x, y) +.. function:: rst_rastertoworldcoordy(tile, x, y) - Computes the world coordinates of the raster pixel at the given x and y coordinates. + Computes the world coordinates of the raster tile at the given x and y pixel coordinates. The result is the X coordinate of the point after applying the GeoTransform of the raster. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. @@ -1733,42 +1524,34 @@ rst_rastertoworldcoordy :type col: Column (IntegerType) :param y: y coordinate of the pixel. :type col: Column (IntegerType) - :rtype: Column: StringType + :rtype: Column: DoubleType :example: .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_rastertoworldcoordy('path', F.lit(3), F.lit(3))).show() + df.select(mos.rst_rastertoworldcoordy('tile', F.lit(3), F.lit(3))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_rastertoworldcoordy(path, 3, 3) | + | rst_rastertoworldcoordy(tile, 3, 3) | +------------------------------------------------------------------------------------------------------------------+ | 89.84999847624096 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_rastertoworldcoordy(col("path"), lit(3), lit(3))).show() + df.select(rst_rastertoworldcoordy(col("tile"), lit(3), lit(3))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_rastertoworldcoordy(path, 3, 3) | + | rst_rastertoworldcoordy(tile, 3, 3) | +------------------------------------------------------------------------------------------------------------------+ | 89.84999847624096 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_rastertoworldcoordy(path, 3, 3) + SELECT rst_rastertoworldcoordy(tile, 3, 3) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_rastertoworldcoordy(path, 3, 3) | + | rst_rastertoworldcoordy(tile, 3, 3) | +------------------------------------------------------------------------------------------------------------------+ | 89.84999847624096 | +------------------------------------------------------------------------------------------------------------------+ @@ -1776,9 +1559,9 @@ rst_rastertoworldcoordy rst_retile ********************** -.. function:: rst_retile(raster, width, height) +.. function:: rst_retile(tile, width, height) - Retiles the raster to the given tile size. The result is a collection of new raster files. + Retiles the raster tile to the given size. The result is a collection of new raster tiles. The new rasters are stored in the checkpoint directory. The results are the paths to the new rasters. The result set is automatically exploded. @@ -1789,18 +1572,16 @@ rst_retile :type col: Column (IntegerType) :param height: The height of the tiles. :type col: Column (IntegerType) - :rtype: Column: StringType + :rtype: Column: (RasterTileType) :example: .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_retile('path', F.lit(300), F.lit(300))).show() + df.select(mos.rst_retile('tile', F.lit(300), F.lit(300))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_retile(path, 300, 300) | + | rst_retile(tile, 300, 300) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | @@ -1808,12 +1589,9 @@ rst_retile .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_retile(col("path"), lit(300), lit(300))).show() + df.select(rst_retile(col("tile"), lit(300), lit(300))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_retile(path, 300, 300) | + | rst_retile(tile, 300, 300) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | @@ -1821,12 +1599,9 @@ rst_retile .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_retile(path, 300, 300) + SELECT rst_retile(tile, 300, 300) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_retile(path, 300, 300) | + | rst_retile(tile, 300, 300) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | @@ -1835,9 +1610,9 @@ rst_retile rst_rotation ********************** -.. function:: rst_rotation(raster) +.. function:: rst_rotation(tile) - Computes the rotation of the raster in degrees. + Computes the rotation of the raster tile in degrees. The rotation is the angle between the X axis and the North axis. The rotation is computed using the GeoTransform of the raster. @@ -1850,11 +1625,9 @@ rst_rotation .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_rotation('path').show() + df.select(mos.rst_rotation('tile').display() +------------------------------------------------------------------------------------------------------------------+ - | rst_rotation(path) | + | rst_rotation(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | | 21.2 | @@ -1862,12 +1635,9 @@ rst_rotation .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_rotation(col("path"))).show() + df.select(rst_rotation(col("tile"))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_rotation(path) | + | rst_rotation(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | | 21.2 | @@ -1875,12 +1645,9 @@ rst_rotation .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_rotation(path) + SELECT rst_rotation(tile) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_rotation(path) | + | rst_rotation(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | | 21.2 | @@ -1889,9 +1656,9 @@ rst_rotation rst_scalex ********************** -.. function:: rst_scalex(raster) +.. function:: rst_scalex(tile) - Computes the scale of the raster in the X direction. + Computes the scale of the raster tile in the X direction. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. :type col: Column (RasterTileType) @@ -1902,35 +1669,27 @@ rst_scalex .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_scalex('path')).show() + df.select(mos.rst_scalex('tile')).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_scalex(path) | + | rst_scalex(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_scalex(col("path"))).show() + df.select(rst_scalex(col("tile"))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_scalex(path) | + | rst_scalex(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_scalex(path) + SELECT rst_scalex(tile) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_scalex(path) | + | rst_scalex(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | +------------------------------------------------------------------------------------------------------------------+ @@ -1938,9 +1697,9 @@ rst_scalex rst_scaley ********************** -.. function:: rst_scaley(raster) +.. function:: rst_scaley(tile) - Computes the scale of the raster in the Y direction. + Computes the scale of the raster tile in the Y direction. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. :type col: Column (RasterTileType) @@ -1951,9 +1710,7 @@ rst_scaley .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_scaley('path')).show() + df.select(mos.rst_scaley('tile')).display() +------------------------------------------------------------------------------------------------------------------+ | rst_scaley(path) | +------------------------------------------------------------------------------------------------------------------+ @@ -1962,24 +1719,18 @@ rst_scaley .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_scaley(col("path"))).show() + df.select(rst_scaley(col("tile"))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_scaley(path) | + | rst_scaley(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_scaley(path) + SELECT rst_scaley(tile) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_scaley(path) | + | rst_scaley(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | +------------------------------------------------------------------------------------------------------------------+ @@ -1987,10 +1738,10 @@ rst_scaley rst_setnodata ********************** -.. function:: rst_setnodata(raster, nodata) +.. function:: rst_setnodata(tile, nodata) - Sets the nodata value of the raster. - The result is a new raster with the nodata value set. + Sets the nodata value of the raster tile. + The result is a new raster tile with the nodata value set. The same nodata value is set for all bands of the raster if a single value is passed. If an array of values is passed, the nodata value is set for each band of the raster. @@ -1998,7 +1749,7 @@ rst_setnodata :type col: Column (RasterTileType) :param nodata: The nodata value to set. :type col: Column (DoubleType) / ArrayType(DoubleType) - :rtype: Column: StringType + :rtype: Column: (RasterTileType) :example: @@ -2006,11 +1757,9 @@ rst_setnodata .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "tif")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - df.select(mos.rst_setnodata('path', F.lit(0))).show() + df.select(mos.rst_setnodata('tile', F.lit(0))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_setnodata(path, 0) | + | rst_setnodata(tile, 0) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | @@ -2018,12 +1767,9 @@ rst_setnodata .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "tif") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - df.select(rst_setnodata(col("path"), lit(0))).show() + df.select(rst_setnodata(col("tile"), lit(0))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_setnodata(path, 0) | + | rst_setnodata(tile, 0) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | @@ -2031,12 +1777,9 @@ rst_setnodata .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_tif - USING gdal - OPTIONS (extensions "tif", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - SELECT rst_setnodata(path, 0) + SELECT rst_setnodata(tile, 0) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_setnodata(path, 0) | + | rst_setnodata(tile, 0) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | @@ -2045,9 +1788,9 @@ rst_setnodata rst_skewx ********************** -.. function:: rst_skewx(raster) +.. function:: rst_skewx(tile) - Computes the skew of the raster in the X direction. + Computes the skew of the raster tile in the X direction. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. :type col: Column (RasterTileType) @@ -2058,35 +1801,27 @@ rst_skewx .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_skewx('path')).show() + df.select(mos.rst_skewx('tile')).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_skewx(path) | + | rst_skewx(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_skewx(col("path"))).show() + df.select(rst_skewx(col("tile"))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_skewx(path) | + | rst_skewx(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_skewx(path) + SELECT rst_skewx(tile) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_skewx(path) | + | rst_skewx(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | +------------------------------------------------------------------------------------------------------------------+ @@ -2094,9 +1829,9 @@ rst_skewx rst_skewy ********************** -.. function:: rst_skewx(raster) +.. function:: rst_skewx(tile) - Computes the skew of the raster in the Y direction. + Computes the skew of the raster tile in the Y direction. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. :type col: Column (RasterTileType) @@ -2107,35 +1842,27 @@ rst_skewy .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_skewy('path')).show() + df.select(mos.rst_skewy('tile')).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_skewy(path) | + | rst_skewy(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_skewy(col("path"))).show() + df.select(rst_skewy(col("tile"))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_skewy(path) | + | rst_skewy(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_skewy(path) + SELECT rst_skewy(tile) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_skewy(path) | + | rst_skewy(tile) | +------------------------------------------------------------------------------------------------------------------+ | 1.2 | +------------------------------------------------------------------------------------------------------------------+ @@ -2143,9 +1870,9 @@ rst_skewy rst_srid ********************** -.. function:: rst_srid(raster) +.. function:: rst_srid(tile) - Computes the SRID of the raster. + Computes the SRID of the raster tile. The SRID is the EPSG code of the raster. .. note:: For complex CRS definition the EPSG code may default to 0. @@ -2159,35 +1886,27 @@ rst_srid .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_srid('path')).show() + df.select(mos.rst_srid('tile')).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_srid(path) | + | rst_srid(tile) | +------------------------------------------------------------------------------------------------------------------+ | 9122 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_srid(col("path"))).show() + df.select(rst_srid(col("tile"))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_srid(path) | + | rst_srid(tile) | +------------------------------------------------------------------------------------------------------------------+ | 9122 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_srid(path) + SELECT rst_srid(tile) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_srid(path) | + | rst_srid(tile) | +------------------------------------------------------------------------------------------------------------------+ | 9122 | +------------------------------------------------------------------------------------------------------------------+ @@ -2195,9 +1914,9 @@ rst_srid rst_subdatasets ********************** -.. function:: rst_subdatasets(raster) +.. function:: rst_subdatasets(tile) - Computes the subdatasets of the raster. + Computes the subdatasets of the raster tile. The subdatasets are the paths to the subdatasets of the raster. The result is a map of the subdataset path to the subdatasets and the description of the subdatasets. @@ -2210,11 +1929,9 @@ rst_subdatasets .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_subdatasets('path')).show() + df.select(mos.rst_subdatasets('tile')).display() +--------------------------------------------------------------------------------------------------------------------+ - | rst_subdatasets(path) | + | rst_subdatasets(tile) | +--------------------------------------------------------------------------------------------------------------------+ | {"NETCDF:\"/dbfs/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral/ct5km_baa_max_7d_v3_1_2022010 | | 6-1.nc\":bleaching_alert_area": "[1x3600x7200] N/A (8-bit unsigned integer)", "NETCDF:\"/dbfs/FileStore/geospatial | @@ -2224,12 +1941,9 @@ rst_subdatasets .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_subdatasets(col("path"))).show() + df.select(rst_subdatasets(col("tile"))).show +--------------------------------------------------------------------------------------------------------------------+ - | rst_subdatasets(path) | + | rst_subdatasets(tile) | +--------------------------------------------------------------------------------------------------------------------+ | {"NETCDF:\"/dbfs/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral/ct5km_baa_max_7d_v3_1_2022010 | | 6-1.nc\":bleaching_alert_area": "[1x3600x7200] N/A (8-bit unsigned integer)", "NETCDF:\"/dbfs/FileStore/geospatial | @@ -2239,12 +1953,9 @@ rst_subdatasets .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_subdatasets(path) + SELECT rst_subdatasets(tile) FROM table +--------------------------------------------------------------------------------------------------------------------+ - | rst_subdatasets(path) | + | rst_subdatasets(tile) | +--------------------------------------------------------------------------------------------------------------------+ | {"NETCDF:\"/dbfs/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral/ct5km_baa_max_7d_v3_1_2022010 | | 6-1.nc\":bleaching_alert_area": "[1x3600x7200] N/A (8-bit unsigned integer)", "NETCDF:\"/dbfs/FileStore/geospatial | @@ -2255,9 +1966,9 @@ rst_subdatasets rst_subdivide ********************** -.. function:: rst_subdivide(raster, sizeInMB) +.. function:: rst_subdivide(tile, sizeInMB) - Subdivides the raster to the given tile size in MB. The result is a collection of new raster files. + Subdivides the raster tile to the given tile size in MB. The result is a collection of new raster tiles. The tiles are split until the expected size of a tile is < sizeInMB. The tile is always split in 4 tiles. This ensures that the tiles are always split in the same way. The aspect ratio of the tiles is preserved. @@ -2275,11 +1986,9 @@ rst_subdivide .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "tif")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - df.select(mos.rst_subdivide('path', F.lit(10))).show() + df.select(mos.rst_subdivide('tile', F.lit(10))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_subdivide(path, 10) | + | rst_subdivide(tile, 10) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | @@ -2287,12 +1996,9 @@ rst_subdivide .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "tif") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - df.select(rst_subdivide(col("path"), lit(10))).show() + df.select(rst_subdivide(col("tile"), lit(10))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_subdivide(path, 10) | + | rst_subdivide(tile, 10) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | @@ -2300,12 +2006,9 @@ rst_subdivide .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_tif - USING gdal - OPTIONS (extensions "tif", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - SELECT rst_subdivide(path, 10) + SELECT rst_subdivide(tile, 10) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_subdivide(path, 10) | + | rst_subdivide(tile, 10) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | @@ -2314,9 +2017,9 @@ rst_subdivide rst_summary ********************** -.. function:: rst_summary(raster) +.. function:: rst_summary(tile) - Computes the summary of the raster. + Computes the summary of the raster tile. The summary is a map of the statistics of the raster. The logic is produced by gdalinfo procedure. The result is stored as JSON. @@ -2330,11 +2033,9 @@ rst_summary .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_summary('path')).show() + df.select(mos.rst_summary('tile')).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_summary(path) | + | rst_summary(tile) | +------------------------------------------------------------------------------------------------------------------+ | { "description":"/dbfs/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral/ct5km_baa_max_7d_v3_1| |_20220106-1.nc", "driverShortName":"netCDF", "driverLongName":"Network Common Data Format", "files":[ | @@ -2345,12 +2046,9 @@ rst_summary .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_summary(col("path"))).show() + df.select(rst_summary(col("tile"))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_summary(path) | + | rst_summary(tile) | +------------------------------------------------------------------------------------------------------------------+ | { "description":"/dbfs/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral/ct5km_baa_max_7d_v3_1| |_20220106-1.nc", "driverShortName":"netCDF", "driverLongName":"Network Common Data Format", "files":[ | @@ -2361,12 +2059,9 @@ rst_summary .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_summary(path) + SELECT rst_summary(tile) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_summary(path) | + | rst_summary(tile) | +------------------------------------------------------------------------------------------------------------------+ | { "description":"/dbfs/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral/ct5km_baa_max_7d_v3_1| |_20220106-1.nc", "driverShortName":"netCDF", "driverLongName":"Network Common Data Format", "files":[ | @@ -2378,9 +2073,9 @@ rst_summary rst_tessellate ********************** -.. function:: rst_tessellate(raster, resolution) +.. function:: rst_tessellate(tile, resolution) - Tessellates the raster to the given resolution of the supported grid (H3, BNG, Custom). The result is a collection of new raster files. + Tessellates the raster tile to the given resolution of the supported grid (H3, BNG, Custom). The result is a collection of new raster tiles. Each tile in the tile set corresponds to a cell that is a part of the tesselation of the bounding box of the raster. The result set is automatically exploded. If rst_merge is called on the tile set the original raster will be reconstructed. @@ -2388,18 +2083,16 @@ rst_tessellate :param tile: A column containing the raster tile. :type col: Column (RasterTileType) - :param sizeInMB: The size of the tiles in MB. + :param resolution: The resolution of the supported grid. :example: .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "tif")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - df.select(mos.rst_tessellate('path', F.lit(10))).show() + df.select(mos.rst_tessellate('tile', F.lit(10))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_tessellate(path, 10) | + | rst_tessellate(tile, 10) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | @@ -2407,12 +2100,9 @@ rst_tessellate .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "tif") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - df.select(rst_tessellate(col("path"), lit(10))).show() + df.select(rst_tessellate(col("tile"), lit(10))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_tessellate(path, 10) | + | rst_tessellate(tile, 10) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | @@ -2420,12 +2110,9 @@ rst_tessellate .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_tif - USING gdal - OPTIONS (extensions "tif", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - SELECT rst_tessellate(path, 10) + SELECT rst_tessellate(tile, 10) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_tessellate(path, 10) | + | rst_tessellate(tile, 10) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | @@ -2434,9 +2121,9 @@ rst_tessellate rst_tooverlappingtiles ********************** -.. function:: rst_tooverlappingtiles(raster, width, height, overlap) +.. function:: rst_tooverlappingtiles(tile, width, height, overlap) - Splits the raster into overlapping tiles of the given width and height. + Splits the raster tile into overlapping tiles of the given width and height. The overlap is the the percentage of the tile size that the tiles overlap. The result is a collection of new raster files. The result set is automatically exploded. @@ -2457,11 +2144,9 @@ rst_tooverlappingtiles .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "tif")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - df.select(mos.rst_tooverlappingtiles('path', F.lit(10), F.lit(10), F.lit(10))).show() + df.select(mos.rst_tooverlappingtiles('tile', F.lit(10), F.lit(10), F.lit(10))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_tooverlappingtiles(path, 10, 10, 10) | + | rst_tooverlappingtiles(tile, 10, 10, 10) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | @@ -2469,12 +2154,9 @@ rst_tooverlappingtiles .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "tif") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif - df.select(rst_tooverlappingtiles(col("path"), lit(10), lit(10), lit(10))).show() + df.select(rst_tooverlappingtiles(col("tile"), lit(10), lit(10), lit(10))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_tooverlappingtiles(path, 10, 10, 10) | + | rst_tooverlappingtiles(tile, 10, 10, 10) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | @@ -2482,12 +2164,9 @@ rst_tooverlappingtiles .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_tif - USING gdal - OPTIONS (extensions "tif", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - SELECT rst_tooverlappingtiles(path, 10, 10, 10) + SELECT rst_tooverlappingtiles(tile, 10, 10, 10) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_tooverlappingtiles(path, 10, 10, 10) | + | rst_tooverlappingtiles(tile, 10, 10, 10) | +------------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | | {index_id: 593308294097928192, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "GTiff" } | @@ -2496,9 +2175,9 @@ rst_tooverlappingtiles rst_tryopen ********************** -.. function:: rst_tryopen(raster) +.. function:: rst_tryopen(tile) - Tries to open the raster. If the raster cannot be opened the result is false and if the raster can be opened the result is true. + Tries to open the raster tile. If the raster cannot be opened the result is false and if the raster can be opened the result is true. :param tile: A column containing the raster tile. :type col: Column (RasterTileType) @@ -2509,35 +2188,27 @@ rst_tryopen .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "tif")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - df.select(mos.rst_tryopen('path')).show() + df.select(mos.rst_tryopen('tile')).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_tryopen(path) | + | rst_tryopen(tile) | +------------------------------------------------------------------------------------------------------------------+ | true | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "tif") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif - df.select(rst_tryopen(col("path"))).show() + df.select(rst_tryopen(col("tile"))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_tryopen(path) | + | rst_tryopen(tile) | +------------------------------------------------------------------------------------------------------------------+ | true | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_tif - USING gdal - OPTIONS (extensions "tif", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/tif") - SELECT rst_tryopen(path) + SELECT rst_tryopen(tile) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_tryopen(path) | + | rst_tryopen(tile) | +------------------------------------------------------------------------------------------------------------------+ | true | +------------------------------------------------------------------------------------------------------------------+ @@ -2545,9 +2216,9 @@ rst_tryopen rst_upperleftx ********************** -.. function:: rst_upperleftx(raster) +.. function:: rst_upperleftx(tile) - Computes the upper left X coordinate of the raster. + Computes the upper left X coordinate of the raster tile. The value is computed based on GeoTransform. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. @@ -2559,35 +2230,27 @@ rst_upperleftx .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_upperleftx('path')).show() + df.select(mos.rst_upperleftx('tile')).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_upperleftx(path) | + | rst_upperleftx(tile) | +------------------------------------------------------------------------------------------------------------------+ | -180.00000610436345 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_upperleftx(col("path"))).show() + df.select(rst_upperleftx(col("tile"))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_upperleftx(path) | + | rst_upperleftx(tile) | +------------------------------------------------------------------------------------------------------------------+ | -180.00000610436345 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_upperleftx(path) + SELECT rst_upperleftx(tile) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_upperleftx(path) | + | rst_upperleftx(tile) | +------------------------------------------------------------------------------------------------------------------+ | -180.00000610436345 | +------------------------------------------------------------------------------------------------------------------+ @@ -2595,9 +2258,9 @@ rst_upperleftx rst_upperlefty ********************** -.. function:: rst_upperlefty(raster) +.. function:: rst_upperlefty(tile) - Computes the upper left Y coordinate of the raster. + Computes the upper left Y coordinate of the raster tile. The value is computed based on GeoTransform. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. @@ -2609,35 +2272,27 @@ rst_upperlefty .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_upperlefty('path')).show() + df.select(mos.rst_upperlefty('tile')).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_upperlefty(path) | + | rst_upperlefty(tile) | +------------------------------------------------------------------------------------------------------------------+ | 89.99999847369712 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_upperlefty(col("path"))).show() + df.select(rst_upperlefty(col("tile"))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_upperlefty(path) | + | rst_upperlefty(tile) | +------------------------------------------------------------------------------------------------------------------+ | 89.99999847369712 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_upperlefty(path) + SELECT rst_upperlefty(tile) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_upperlefty(path) | + | rst_upperlefty(tile) | +------------------------------------------------------------------------------------------------------------------+ | 89.99999847369712 | +------------------------------------------------------------------------------------------------------------------+ @@ -2645,9 +2300,9 @@ rst_upperlefty rst_width ********************** -.. function:: rst_width(raster) +.. function:: rst_width(tile) - Computes the width of the raster in pixels. + Computes the width of the raster tile in pixels. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. @@ -2659,35 +2314,27 @@ rst_width .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_width('path')).show() + df.select(mos.rst_width('tile')).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_width(path) | + | rst_width(tile) | +------------------------------------------------------------------------------------------------------------------+ | 600 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_width(col("path"))).show() + df.select(rst_width(col("tile"))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_width(path) | + | rst_width(tile) | +------------------------------------------------------------------------------------------------------------------+ | 600 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_width(path) + SELECT rst_width(tile) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_width(path) | + | rst_width(tile) | +------------------------------------------------------------------------------------------------------------------+ | 600 | +------------------------------------------------------------------------------------------------------------------+ @@ -2695,9 +2342,9 @@ rst_width rst_worldtorastercoord ********************** -.. function:: rst_worldtorastercoord(raster, xworld, yworld) +.. function:: rst_worldtorastercoord(tile, xworld, yworld) - Computes the raster coordinates of the world coordinates. + Computes the raster tile coordinates of the world coordinates. The raster coordinates are the pixel coordinates of the raster. The world coordinates are the coordinates in the CRS of the raster. The coordinates are resolved using GeoTransform. @@ -2715,35 +2362,27 @@ rst_worldtorastercoord .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_worldtorastercoord('path', F.lit(-160.1), F.lit(40.0))).show() + df.select(mos.rst_worldtorastercoord('tile', F.lit(-160.1), F.lit(40.0))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_worldtorastercoord(path) | + | rst_worldtorastercoord(tile, -160.1, 40.0) | +------------------------------------------------------------------------------------------------------------------+ | {"x": 398, "y": 997} | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_worldtorastercoord(col("path"), lit(-160.1), lit(40.0))).show() + df.select(rst_worldtorastercoord(col("tile"), lit(-160.1), lit(40.0))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_worldtorastercoord(path) | + | rst_worldtorastercoord(tile, -160.1, 40.0) | +------------------------------------------------------------------------------------------------------------------+ | {"x": 398, "y": 997} | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_worldtorastercoord(path, -160.1, 40.0) + SELECT rst_worldtorastercoord(tile, -160.1, 40.0) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_worldtorastercoord(path) | + | rst_worldtorastercoord(tile, -160.1, 40.0) | +------------------------------------------------------------------------------------------------------------------+ | {"x": 398, "y": 997} | +------------------------------------------------------------------------------------------------------------------+ @@ -2751,9 +2390,9 @@ rst_worldtorastercoord rst_worldtorastercoordx *********************** -.. function:: rst_worldtorastercoordx(raster, xworld, yworld) +.. function:: rst_worldtorastercoordx(tile, xworld, yworld) - Computes the raster coordinates of the world coordinates. + Computes the raster tile coordinates of the world coordinates. The raster coordinates are the pixel coordinates of the raster. The world coordinates are the coordinates in the CRS of the raster. The coordinates are resolved using GeoTransform. @@ -2773,35 +2412,27 @@ rst_worldtorastercoordx .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_worldtorastercoord('path', F.lit(-160.1), F.lit(40.0))).show() + df.select(mos.rst_worldtorastercoord('tile', F.lit(-160.1), F.lit(40.0))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_worldtorastercoordx(path, -160.1, 40.0) | + | rst_worldtorastercoordx(tile, -160.1, 40.0) | +------------------------------------------------------------------------------------------------------------------+ | 398 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_worldtorastercoordx(col("path"), lit(-160.1), lit(40.0))).show() + df.select(rst_worldtorastercoordx(col("tile"), lit(-160.1), lit(40.0))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_worldtorastercoordx(path, -160.1, 40.0) | + | rst_worldtorastercoordx(tile, -160.1, 40.0) | +------------------------------------------------------------------------------------------------------------------+ | 398 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_worldtorastercoordx(path, -160.1, 40.0) + SELECT rst_worldtorastercoordx(tile, -160.1, 40.0) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_worldtorastercoordx(path, -160.1, 40.0) | + | rst_worldtorastercoordx(tile, -160.1, 40.0) | +------------------------------------------------------------------------------------------------------------------+ | 398 | +------------------------------------------------------------------------------------------------------------------+ @@ -2809,9 +2440,9 @@ rst_worldtorastercoordx rst_worldtorastercoordy *********************** -.. function:: rst_worldtorastercoordy(raster, xworld, yworld) +.. function:: rst_worldtorastercoordy(tile, xworld, yworld) - Computes the raster coordinates of the world coordinates. + Computes the raster tile coordinates of the world coordinates. The raster coordinates are the pixel coordinates of the raster. The world coordinates are the coordinates in the CRS of the raster. The coordinates are resolved using GeoTransform. @@ -2831,35 +2462,27 @@ rst_worldtorastercoordy .. tabs:: .. code-tab:: py - df = spark.read.format("binaryFile").option("extensions", "nc")\ - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(mos.rst_worldtorastercoordy('path', F.lit(-160.1), F.lit(40.0))).show() + df.select(mos.rst_worldtorastercoordy('tile', F.lit(-160.1), F.lit(40.0))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_worldtorastercoordy(path, -160.1, 40.0) | + | rst_worldtorastercoordy(tile, -160.1, 40.0) | +------------------------------------------------------------------------------------------------------------------+ | 997 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: scala - val df = spark.read - .format("binaryFile").option("extensions", "nc") - .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - df.select(rst_worldtorastercoordy(col("path"), lit(-160.1), lit(40.0))).show() + df.select(rst_worldtorastercoordy(col("tile"), lit(-160.1), lit(40.0))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_worldtorastercoordy(path, -160.1, 40.0) | + | rst_worldtorastercoordy(tile, -160.1, 40.0) | +------------------------------------------------------------------------------------------------------------------+ | 997 | +------------------------------------------------------------------------------------------------------------------+ .. code-tab:: sql - CREATE TABLE IF NOT EXISTS TABLE coral_netcdf - USING gdal - OPTIONS (extensions "nc", path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") - SELECT rst_worldtorastercoordy(path, -160.1, 40.0) + SELECT rst_worldtorastercoordy(tile, -160.1, 40.0) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_worldtorastercoordy(path, -160.1, 40.0) | + | rst_worldtorastercoordy(tile, -160.1, 40.0) | +------------------------------------------------------------------------------------------------------------------+ | 997 | +------------------------------------------------------------------------------------------------------------------+ diff --git a/python/mosaic/api/raster.py b/python/mosaic/api/raster.py index b191ba8d5..a2d203363 100644 --- a/python/mosaic/api/raster.py +++ b/python/mosaic/api/raster.py @@ -1,8 +1,10 @@ +from mosaic.config import config +from mosaic.utils.types import ColumnOrName from pyspark.sql import Column from pyspark.sql.functions import _to_java_column as pyspark_to_java_column +from pyspark.sql.functions import lit +from typing import Any -from mosaic.config import config -from mosaic.utils.types import ColumnOrName ####################### # Raster functions # @@ -61,14 +63,14 @@ ] -def rst_bandmetadata(raster: ColumnOrName, band: ColumnOrName) -> Column: +def rst_bandmetadata(raster_tile: ColumnOrName, band: ColumnOrName) -> Column: """ Returns the metadata for the band as a map type, (key->value) pairs. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. band : Column (IntegerType) Band index, starts from 1. @@ -79,18 +81,18 @@ def rst_bandmetadata(raster: ColumnOrName, band: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_bandmetadata", pyspark_to_java_column(raster), pyspark_to_java_column(band) + "rst_bandmetadata", pyspark_to_java_column(raster_tile), pyspark_to_java_column(band) ) -def rst_boundingbox(raster: ColumnOrName) -> Column: +def rst_boundingbox(raster_tile: ColumnOrName) -> Column: """ Returns the bounding box of the raster as a WKT polygon. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -99,41 +101,41 @@ def rst_boundingbox(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_boundingbox", pyspark_to_java_column(raster) + "rst_boundingbox", pyspark_to_java_column(raster_tile) ) -def rst_clip(raster: ColumnOrName, geometry: ColumnOrName) -> Column: +def rst_clip(raster_tile: ColumnOrName, geometry: ColumnOrName) -> Column: """ - Clips the raster to the given geometry. - The result is the path to the clipped raster. + Clips the raster to the given supported geometry (WKT, WKB, GeoJSON). + The result is Mosaic raster tile struct column to the clipped raster. The result is stored in the checkpoint directory. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. geometry : Column (StringType) The geometry to clip the raster to. Returns ------- Column (StringType) - The path to the clipped raster. + Mosaic raster tile struct column. """ return config.mosaic_context.invoke_function( - "rst_clip", pyspark_to_java_column(raster), pyspark_to_java_column(geometry) + "rst_clip", pyspark_to_java_column(raster_tile), pyspark_to_java_column(geometry) ) -def rst_combineavg(rasters: ColumnOrName) -> Column: +def rst_combineavg(raster_tiles: ColumnOrName) -> Column: """ Combines the rasters into a single raster. Parameters ---------- - rasters : Column (ArrayType(StringType)) + raster_tiles : Column (ArrayType(StringType)) Raster tiles to combine. Returns @@ -143,39 +145,39 @@ def rst_combineavg(rasters: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_combineavg", pyspark_to_java_column(rasters) + "rst_combineavg", pyspark_to_java_column(raster_tiles) ) -def rst_derivedband(raster: ColumnOrName, pythonFunc: ColumnOrName, funcName: ColumnOrName) -> Column: +def rst_derivedband(raster_tile: ColumnOrName, python_func: ColumnOrName, func_name: ColumnOrName) -> Column: """ Creates a new band by applying the given python function to the input rasters. The result is a raster tile. Parameters ---------- - raster : Column (StringType) - Path to the raster file. - pythonFunc : Column (StringType) + raster_tile : Column (StringType) + Mosaic raster tile struct column. + python_func : Column (StringType) The python function to apply to the bands. - funcName : Column (StringType) + func_name : Column (StringType) The name of the function. Returns ------- Column (StringType) - The path to the new raster. + Mosaic raster tile struct column. """ return config.mosaic_context.invoke_function( "rst_derivedband", - pyspark_to_java_column(raster), - pyspark_to_java_column(pythonFunc), - pyspark_to_java_column(funcName), + pyspark_to_java_column(raster_tile), + pyspark_to_java_column(python_func), + pyspark_to_java_column(func_name), ) -def rst_georeference(raster: ColumnOrName) -> Column: +def rst_georeference(raster_tile: ColumnOrName) -> Column: """ Returns GeoTransform of the raster as a GT array of doubles. GT(0) x-coordinate of the upper-left corner of the upper-left pixel. @@ -188,8 +190,8 @@ def rst_georeference(raster: ColumnOrName) -> Column: Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -198,18 +200,18 @@ def rst_georeference(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_georeference", pyspark_to_java_column(raster) + "rst_georeference", pyspark_to_java_column(raster_tile) ) -def rst_getnodata(raster: ColumnOrName) -> Column: +def rst_getnodata(raster_tile: ColumnOrName) -> Column: """ Returns the nodata value of the band. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. band : Column (IntegerType) Band index, starts from 1. @@ -220,41 +222,41 @@ def rst_getnodata(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_getnodata", pyspark_to_java_column(raster) + "rst_getnodata", pyspark_to_java_column(raster_tile) ) -def rst_getsubdataset(raster: ColumnOrName, subdataset: ColumnOrName) -> Column: +def rst_getsubdataset(raster_tile: ColumnOrName, subdataset: ColumnOrName) -> Column: """ Returns the subdataset of the raster. - The subdataset is the path to the subdataset of the raster. + The subdataset is the Mosaic raster tile struct of the subdataset of the raster. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. subdataset : Column (IntegerType) The index of the subdataset to get. Returns ------- Column (StringType) - The path to the subdataset. + Mosaic raster tile struct of the subdataset. """ return config.mosaic_context.invoke_function( "rst_getsubdataset", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(subdataset), ) -def rst_height(raster: ColumnOrName) -> Column: +def rst_height(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -263,36 +265,36 @@ def rst_height(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_height", pyspark_to_java_column(raster) + "rst_height", pyspark_to_java_column(raster_tile) ) -def rst_initnodata(raster: ColumnOrName) -> Column: +def rst_initnodata(raster_tile: ColumnOrName) -> Column: """ Initializes the nodata value of the band. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- Column (StringType) - The path to the raster file. + Mosaic raster tile struct column. """ return config.mosaic_context.invoke_function( - "rst_initnodata", pyspark_to_java_column(raster) + "rst_initnodata", pyspark_to_java_column(raster_tile) ) -def rst_isempty(raster: ColumnOrName) -> Column: +def rst_isempty(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -301,16 +303,16 @@ def rst_isempty(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_isempty", pyspark_to_java_column(raster) + "rst_isempty", pyspark_to_java_column(raster_tile) ) -def rst_memsize(raster: ColumnOrName) -> Column: +def rst_memsize(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -319,16 +321,16 @@ def rst_memsize(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_memsize", pyspark_to_java_column(raster) + "rst_memsize", pyspark_to_java_column(raster_tile) ) -def rst_metadata(raster: ColumnOrName) -> Column: +def rst_metadata(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -337,47 +339,47 @@ def rst_metadata(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_metadata", pyspark_to_java_column(raster) + "rst_metadata", pyspark_to_java_column(raster_tile) ) -def rst_merge(rasters: ColumnOrName) -> Column: +def rst_merge(raster_tiles: ColumnOrName) -> Column: """ - Merges the rasters into a single raster. - The result is the path to the merged raster. + Merges (mosaics) the rasters into a single raster. + The result is Mosaic raster tile struct of the merged raster. The result is stored in the checkpoint directory. Parameters ---------- - rasters : Column (ArrayType(StringType)) - Paths to the rasters to merge. + raster_tiles : Column (ArrayType(StringType)) + Raster tiles to merge. Returns ------- Column (StringType) - The path to the merged raster. + Mosaic raster tile struct of the merged raster. """ return config.mosaic_context.invoke_function( - "rst_merge", pyspark_to_java_column(rasters) + "rst_merge", pyspark_to_java_column(raster_tiles) ) def rst_frombands(bands: ColumnOrName) -> Column: """ - Merges the bands into a single raster. - The result is the path to the merged raster. + Stack an array of bands into a raster tile. + The result is Mosaic raster tile struct. The result is stored in the checkpoint directory. Parameters ---------- bands : Column (ArrayType(StringType)) - Paths to the bands to merge. + Raster tiles of the bands to merge. Returns ------- Column (StringType) - The path to the merged raster. + Mosaic raster tile struct of the band stacking. """ return config.mosaic_context.invoke_function( @@ -385,12 +387,12 @@ def rst_frombands(bands: ColumnOrName) -> Column: ) -def rst_numbands(raster: ColumnOrName) -> Column: +def rst_numbands(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -399,20 +401,20 @@ def rst_numbands(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_numbands", pyspark_to_java_column(raster) + "rst_numbands", pyspark_to_java_column(raster_tile) ) -def rst_ndvi(raster: ColumnOrName, band1: ColumnOrName, band2: ColumnOrName) -> Column: +def rst_ndvi(raster_tile: ColumnOrName, band1: ColumnOrName, band2: ColumnOrName) -> Column: """ Computes the NDVI of the raster. - The result is the path to the NDVI raster. + The result is Mosaic raster tile struct of the NDVI raster. The result is stored in the checkpoint directory. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. band1 : Column (IntegerType) The first band index. band2 : Column (IntegerType) @@ -421,23 +423,23 @@ def rst_ndvi(raster: ColumnOrName, band1: ColumnOrName, band2: ColumnOrName) -> Returns ------- Column (StringType) - The path to the NDVI raster. + Mosaic raster tile structs of the NDVI raster. """ return config.mosaic_context.invoke_function( "rst_ndvi", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(band1), pyspark_to_java_column(band2), ) -def rst_pixelheight(raster: ColumnOrName) -> Column: +def rst_pixelheight(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -446,16 +448,16 @@ def rst_pixelheight(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_pixelheight", pyspark_to_java_column(raster) + "rst_pixelheight", pyspark_to_java_column(raster_tile) ) -def rst_pixelwidth(raster: ColumnOrName) -> Column: +def rst_pixelwidth(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -464,11 +466,11 @@ def rst_pixelwidth(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_pixelwidth", pyspark_to_java_column(raster) + "rst_pixelwidth", pyspark_to_java_column(raster_tile) ) -def rst_rastertogridavg(raster: ColumnOrName, resolution: ColumnOrName) -> Column: +def rst_rastertogridavg(raster_tile: ColumnOrName, resolution: ColumnOrName) -> Column: """ The result is a 2D array of cells, where each cell is a struct of (cellID, value). For getting the output of cellID->value pairs, please use explode() function twice. @@ -477,8 +479,8 @@ def rst_rastertogridavg(raster: ColumnOrName, resolution: ColumnOrName) -> Colum Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -488,12 +490,12 @@ def rst_rastertogridavg(raster: ColumnOrName, resolution: ColumnOrName) -> Colum """ return config.mosaic_context.invoke_function( "rst_rastertogridavg", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(resolution), ) -def rst_rastertogridcount(raster: ColumnOrName, resolution: ColumnOrName) -> Column: +def rst_rastertogridcount(raster_tile: ColumnOrName, resolution: ColumnOrName) -> Column: """ The result is a 2D array of cells, where each cell is a struct of (cellID, value). For getting the output of cellID->value pairs, please use explode() function twice. @@ -502,8 +504,8 @@ def rst_rastertogridcount(raster: ColumnOrName, resolution: ColumnOrName) -> Col Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -513,12 +515,12 @@ def rst_rastertogridcount(raster: ColumnOrName, resolution: ColumnOrName) -> Col """ return config.mosaic_context.invoke_function( "rst_rastertogridcount", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(resolution), ) -def rst_rastertogridmax(raster: ColumnOrName, resolution: ColumnOrName) -> Column: +def rst_rastertogridmax(raster_tile: ColumnOrName, resolution: ColumnOrName) -> Column: """ The result is a 2D array of cells, where each cell is a struct of (cellID, value). For getting the output of cellID->value pairs, please use explode() function twice. @@ -527,8 +529,8 @@ def rst_rastertogridmax(raster: ColumnOrName, resolution: ColumnOrName) -> Colum Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -538,12 +540,12 @@ def rst_rastertogridmax(raster: ColumnOrName, resolution: ColumnOrName) -> Colum """ return config.mosaic_context.invoke_function( "rst_rastertogridmax", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(resolution), ) -def rst_rastertogridmedian(raster: ColumnOrName, resolution: ColumnOrName) -> Column: +def rst_rastertogridmedian(raster_tile: ColumnOrName, resolution: ColumnOrName) -> Column: """ The result is a 2D array of cells, where each cell is a struct of (cellID, value). For getting the output of cellID->value pairs, please use explode() function twice. @@ -552,8 +554,8 @@ def rst_rastertogridmedian(raster: ColumnOrName, resolution: ColumnOrName) -> Co Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -563,12 +565,12 @@ def rst_rastertogridmedian(raster: ColumnOrName, resolution: ColumnOrName) -> Co """ return config.mosaic_context.invoke_function( "rst_rastertogridmedian", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(resolution), ) -def rst_rastertogridmin(raster: ColumnOrName, resolution: ColumnOrName) -> Column: +def rst_rastertogridmin(raster_tile: ColumnOrName, resolution: ColumnOrName) -> Column: """ The result is a 2D array of cells, where each cell is a struct of (cellID, value). For getting the output of cellID->value pairs, please use explode() function twice. @@ -577,8 +579,8 @@ def rst_rastertogridmin(raster: ColumnOrName, resolution: ColumnOrName) -> Colum Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -588,13 +590,13 @@ def rst_rastertogridmin(raster: ColumnOrName, resolution: ColumnOrName) -> Colum """ return config.mosaic_context.invoke_function( "rst_rastertogridmin", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(resolution), ) def rst_rastertoworldcoord( - raster: ColumnOrName, x: ColumnOrName, y: ColumnOrName + raster_tile: ColumnOrName, x: ColumnOrName, y: ColumnOrName ) -> Column: """ Computes the world coordinates of the raster pixel at the given x and y coordinates. @@ -603,8 +605,8 @@ def rst_rastertoworldcoord( Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -614,14 +616,14 @@ def rst_rastertoworldcoord( """ return config.mosaic_context.invoke_function( "rst_rastertoworldcoord", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(x), pyspark_to_java_column(y), ) def rst_rastertoworldcoordx( - raster: ColumnOrName, x: ColumnOrName, y: ColumnOrName + raster_tile: ColumnOrName, x: ColumnOrName, y: ColumnOrName ) -> Column: """ Computes the world coordinates of the raster pixel at the given x and y coordinates. @@ -629,8 +631,8 @@ def rst_rastertoworldcoordx( Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -640,14 +642,14 @@ def rst_rastertoworldcoordx( """ return config.mosaic_context.invoke_function( "rst_rastertoworldcoordx", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(x), pyspark_to_java_column(y), ) def rst_rastertoworldcoordy( - raster: ColumnOrName, x: ColumnOrName, y: ColumnOrName + raster_tile: ColumnOrName, x: ColumnOrName, y: ColumnOrName ) -> Column: """ Computes the world coordinates of the raster pixel at the given x and y coordinates. @@ -655,8 +657,8 @@ def rst_rastertoworldcoordy( Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -666,41 +668,41 @@ def rst_rastertoworldcoordy( """ return config.mosaic_context.invoke_function( "rst_rastertoworldcoordy", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(x), pyspark_to_java_column(y), ) def rst_retile( - raster: ColumnOrName, tileWidth: ColumnOrName, tileHeight: ColumnOrName + raster_tile: ColumnOrName, tile_width: ColumnOrName, tile_height: ColumnOrName ) -> Column: """ Retiles the raster to the given tile size. The result is a collection of new raster files. The new rasters are stored in the checkpoint directory. - The results are the paths to the new rasters. + The results are Mosaic raster tile struct of the new rasters. The result set is automatically exploded. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- Column (StringType) - The path to the raster tiles exploded. + Mosaic raster tile structs from the exploded retile. """ return config.mosaic_context.invoke_function( "rst_retile", - pyspark_to_java_column(raster), - pyspark_to_java_column(tileWidth), - pyspark_to_java_column(tileHeight), + pyspark_to_java_column(raster_tile), + pyspark_to_java_column(tile_width), + pyspark_to_java_column(tile_height), ) -def rst_rotation(raster: ColumnOrName) -> Column: +def rst_rotation(raster_tile: ColumnOrName) -> Column: """ Computes the rotation of the raster in degrees. The rotation is the angle between the X axis and the North axis. @@ -708,8 +710,8 @@ def rst_rotation(raster: ColumnOrName) -> Column: Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -718,18 +720,18 @@ def rst_rotation(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_rotation", pyspark_to_java_column(raster) + "rst_rotation", pyspark_to_java_column(raster_tile) ) -def rst_scalex(raster: ColumnOrName) -> Column: +def rst_scalex(raster_tile: ColumnOrName) -> Column: """ Computes the scale of the raster in the X direction. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -738,18 +740,18 @@ def rst_scalex(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_scalex", pyspark_to_java_column(raster) + "rst_scalex", pyspark_to_java_column(raster_tile) ) -def rst_scaley(raster: ColumnOrName) -> Column: +def rst_scaley(raster_tile: ColumnOrName) -> Column: """ Computes the scale of the raster in the Y direction. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -758,42 +760,42 @@ def rst_scaley(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_scaley", pyspark_to_java_column(raster) + "rst_scaley", pyspark_to_java_column(raster_tile) ) -def rst_setnodata(raster: ColumnOrName, nodata: ColumnOrName) -> Column: +def rst_setnodata(raster_tile: ColumnOrName, nodata: ColumnOrName) -> Column: """ Sets the nodata value of the band. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. nodata : Column (DoubleType) The nodata value to set. Returns ------- Column (StringType) - The path to the raster file. + Mosaic raster tile struct column. """ return config.mosaic_context.invoke_function( "rst_setnodata", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(nodata), ) -def rst_skewx(raster: ColumnOrName) -> Column: +def rst_skewx(raster_tile: ColumnOrName) -> Column: """ Computes the skew of the raster in the X direction. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -802,18 +804,18 @@ def rst_skewx(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_skewx", pyspark_to_java_column(raster) + "rst_skewx", pyspark_to_java_column(raster_tile) ) -def rst_skewy(raster: ColumnOrName) -> Column: +def rst_skewy(raster_tile: ColumnOrName) -> Column: """ Computes the skew of the raster in the Y direction. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -822,19 +824,19 @@ def rst_skewy(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_skewy", pyspark_to_java_column(raster) + "rst_skewy", pyspark_to_java_column(raster_tile) ) -def rst_srid(raster: ColumnOrName) -> Column: +def rst_srid(raster_tile: ColumnOrName) -> Column: """ Computes the SRID of the raster. The SRID is the EPSG code of the raster. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -843,20 +845,20 @@ def rst_srid(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_srid", pyspark_to_java_column(raster) + "rst_srid", pyspark_to_java_column(raster_tile) ) -def rst_subdatasets(raster: ColumnOrName) -> Column: +def rst_subdatasets(raster_tile: ColumnOrName) -> Column: """ Computes the subdatasets of the raster. - The subdatasets are the paths to the subdatasets of the raster. + The input is Mosaic raster tile struct. The result is a map of the subdataset path to the subdatasets and the description of the subdatasets. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -865,11 +867,11 @@ def rst_subdatasets(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_subdatasets", pyspark_to_java_column(raster) + "rst_subdatasets", pyspark_to_java_column(raster_tile) ) -def rst_summary(raster: ColumnOrName) -> Column: +def rst_summary(raster_tile: ColumnOrName) -> Column: """ Computes the summary of the raster. The summary is a map of the statistics of the raster. @@ -878,8 +880,8 @@ def rst_summary(raster: ColumnOrName) -> Column: Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -888,19 +890,19 @@ def rst_summary(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_summary", pyspark_to_java_column(raster) + "rst_summary", pyspark_to_java_column(raster_tile) ) -def rst_tessellate(raster: ColumnOrName, resolution: ColumnOrName) -> Column: +def rst_tessellate(raster_tile: ColumnOrName, resolution: ColumnOrName) -> Column: """ Clip the raster into raster tiles where each tile is a grid tile for the given resolution. The tile set union forms the original raster. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. resolution : Column (IntegerType) The resolution of the tiles. @@ -912,71 +914,75 @@ def rst_tessellate(raster: ColumnOrName, resolution: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( "rst_tessellate", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(resolution), ) -def rst_fromcontent(raster: ColumnOrName, driver: ColumnOrName, sizeInMB: ColumnOrName) -> Column: +def rst_fromcontent(raster_bin: ColumnOrName, driver: ColumnOrName, size_in_mb: Any = -1) -> Column: """ Tiles the raster binary into tiles of the given size. - :param raster: + :param raster_bin: :param driver: - :param sizeInMB: + :param size_in_mb: :return: """ + if type(size_in_mb) == int: + size_in_mb = lit(size_in_mb) return config.mosaic_context.invoke_function( "rst_fromcontent", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_bin), pyspark_to_java_column(driver), - pyspark_to_java_column(sizeInMB) + pyspark_to_java_column(size_in_mb) ) -def rst_fromfile(raster: ColumnOrName, sizeInMB: ColumnOrName) -> Column: +def rst_fromfile(raster_path: ColumnOrName, size_in_mb: Any = -1) -> Column: """ Tiles the raster into tiles of the given size. - :param raster: + :param raster_path: :param sizeInMB: :return: """ - + if type(size_in_mb) == int: + size_in_mb = lit(size_in_mb) + return config.mosaic_context.invoke_function( - "rst_fromfile", pyspark_to_java_column(raster), pyspark_to_java_column(sizeInMB) + "rst_fromfile", pyspark_to_java_column(raster_path), pyspark_to_java_column(size_in_mb) ) def rst_to_overlapping_tiles( - raster: ColumnOrName, + raster_tile: ColumnOrName, width: ColumnOrName, height: ColumnOrName, overlap: ColumnOrName, ) -> Column: """ Tiles the raster into tiles of the given size. - :param raster: + :param raster_tile: :param sizeInMB: :return: """ return config.mosaic_context.invoke_function( "rst_to_overlapping_tiles", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(width), pyspark_to_java_column(height), pyspark_to_java_column(overlap), ) -def rst_tryopen(raster: ColumnOrName) -> Column: +def rst_tryopen(raster_tile: ColumnOrName) -> Column: """ Tries to open the raster and returns a flag indicating if the raster can be opened. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -985,19 +991,19 @@ def rst_tryopen(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_tryopen", pyspark_to_java_column(raster) + "rst_tryopen", pyspark_to_java_column(raster_tile) ) -def rst_subdivide(raster: ColumnOrName, size_in_mb: ColumnOrName) -> Column: +def rst_subdivide(raster_tile: ColumnOrName, size_in_mb: ColumnOrName) -> Column: """ Subdivides the raster into tiles that have to be smaller than the given size in MB. All the tiles have the same aspect ratio as the original raster. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. size_in_mb : Column (IntegerType) The size of the tiles in MB. @@ -1009,20 +1015,20 @@ def rst_subdivide(raster: ColumnOrName, size_in_mb: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( "rst_subdivide", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(size_in_mb), ) -def rst_upperleftx(raster: ColumnOrName) -> Column: +def rst_upperleftx(raster_tile: ColumnOrName) -> Column: """ Computes the upper left X coordinate of the raster. The value is computed based on GeoTransform. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -1031,19 +1037,19 @@ def rst_upperleftx(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_upperleftx", pyspark_to_java_column(raster) + "rst_upperleftx", pyspark_to_java_column(raster_tile) ) -def rst_upperlefty(raster: ColumnOrName) -> Column: +def rst_upperlefty(raster_tile: ColumnOrName) -> Column: """ Computes the upper left Y coordinate of the raster. The value is computed based on GeoTransform. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -1052,18 +1058,18 @@ def rst_upperlefty(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_upperlefty", pyspark_to_java_column(raster) + "rst_upperlefty", pyspark_to_java_column(raster_tile) ) -def rst_width(raster: ColumnOrName) -> Column: +def rst_width(raster_tile: ColumnOrName) -> Column: """ Computes the width of the raster in pixels. Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -1072,12 +1078,12 @@ def rst_width(raster: ColumnOrName) -> Column: """ return config.mosaic_context.invoke_function( - "rst_width", pyspark_to_java_column(raster) + "rst_width", pyspark_to_java_column(raster_tile) ) def rst_worldtorastercoord( - raster: ColumnOrName, x: ColumnOrName, y: ColumnOrName + raster_tile: ColumnOrName, x: ColumnOrName, y: ColumnOrName ) -> Column: """ Computes the raster coordinates of the world coordinates. @@ -1087,8 +1093,8 @@ def rst_worldtorastercoord( Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -1098,14 +1104,14 @@ def rst_worldtorastercoord( """ return config.mosaic_context.invoke_function( "rst_worldtorastercoord", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(x), pyspark_to_java_column(y), ) def rst_worldtorastercoordx( - raster: ColumnOrName, x: ColumnOrName, y: ColumnOrName + raster_tile: ColumnOrName, x: ColumnOrName, y: ColumnOrName ) -> Column: """ Computes the raster coordinates of the world coordinates. @@ -1116,8 +1122,8 @@ def rst_worldtorastercoordx( Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -1127,14 +1133,14 @@ def rst_worldtorastercoordx( """ return config.mosaic_context.invoke_function( "rst_worldtorastercoordx", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(x), pyspark_to_java_column(y), ) def rst_worldtorastercoordy( - raster: ColumnOrName, x: ColumnOrName, y: ColumnOrName + raster_tile: ColumnOrName, x: ColumnOrName, y: ColumnOrName ) -> Column: """ Computes the raster coordinates of the world coordinates. @@ -1145,8 +1151,8 @@ def rst_worldtorastercoordy( Parameters ---------- - raster : Column (StringType) - Path to the raster file. + raster_tile : Column (StringType) + Mosaic raster tile struct column. Returns ------- @@ -1156,7 +1162,7 @@ def rst_worldtorastercoordy( """ return config.mosaic_context.invoke_function( "rst_worldtorastercoordy", - pyspark_to_java_column(raster), + pyspark_to_java_column(raster_tile), pyspark_to_java_column(x), pyspark_to_java_column(y), ) diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetaData.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetaData.scala index 241d913bc..50535ab95 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetaData.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_BandMetaData.scala @@ -50,7 +50,7 @@ object RST_BandMetaData extends WithExpressionInfo { override def name: String = "rst_bandmetadata" - override def usage: String = "_FUNC_(expr1, expr2) - Extracts metadata from a raster band." + override def usage: String = "_FUNC_(expr1, expr2) - Extracts metadata from a raster tile band." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBox.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBox.scala index 8fa2d7314..f40f6c590 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBox.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_BoundingBox.scala @@ -57,7 +57,7 @@ object RST_BoundingBox extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns the bounding box of the raster. + |_FUNC_(expr1) - Returns the bounding box of the raster tile. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Clip.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Clip.scala index 557565afe..2284c3fa9 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Clip.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Clip.scala @@ -55,13 +55,13 @@ object RST_Clip extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns a raster clipped by provided vector. + |_FUNC_(expr1) - Returns a raster tile clipped by provided vector. |""".stripMargin override def example: String = """ | Examples: - | > SELECT _FUNC_(raster, vector); + | > SELECT _FUNC_(raster_tile, vector); | {index_id, clipped_raster, parentPath, driver} | {index_id, clipped_raster, parentPath, driver} | ... diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvg.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvg.scala index 1d923fdc1..fcaddf928 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvg.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvg.scala @@ -43,7 +43,7 @@ object RST_CombineAvg extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns a raster that is a result of combining an array of rasters using average of pixels. + |_FUNC_(expr1) - Combine an array of raster tiles using average of pixels. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAgg.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAgg.scala index f6b3ba1dc..0a6791487 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAgg.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_CombineAvgAgg.scala @@ -114,7 +114,7 @@ object RST_CombineAvgAgg { db.orNull, "rst_combine_avg_agg", """ - | _FUNC_(tiles)) - Combines rasters into a single raster using average. + | _FUNC_(tiles)) - Aggregate to combine raster tiles using an average of pixels. """.stripMargin, "", """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBand.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBand.scala index 822228a1b..fcd1116cd 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBand.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBand.scala @@ -50,7 +50,7 @@ object RST_DerivedBand extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns a raster that is a result of combining an array of rasters using provided python function. + |_FUNC_(expr1) - Combine an array of raster tiles using provided python function. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAgg.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAgg.scala index 47d4aa12a..3ceb03318 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAgg.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_DerivedBandAgg.scala @@ -126,7 +126,7 @@ object RST_DerivedBandAgg { db.orNull, "rst_derived_band_agg", """ - | _FUNC_(tiles)) - Combines rasters into a single raster using provided python function. + | _FUNC_(tiles)) - Aggregate which combines raster tiles using provided python function. """.stripMargin, "", """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBands.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBands.scala index 2befb353c..268a1c550 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBands.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromBands.scala @@ -43,7 +43,7 @@ object RST_FromBands extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns a raster that is a result of stacking and resampling input bands. + |_FUNC_(expr1) - Returns raster tiles that are a result of stacking and resampling input bands. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala index bd2926bcb..8749c0d5b 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromContent.scala @@ -113,8 +113,8 @@ object RST_FromContent extends WithExpressionInfo { override def example: String = """ | Examples: - | > SELECT _FUNC_(raster, driver, sizeInMB); - | {index_id, raster, parentPath, driver} + | > SELECT _FUNC_(raster_bin, driver, size_in_mb); + | {index_id, raster, parent_path, driver} | ... | """.stripMargin diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFile.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFile.scala index fbce5bf58..4a4bf04c0 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFile.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_FromFile.scala @@ -101,7 +101,7 @@ object RST_FromFile extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns a set of new rasters with the specified tile size (tileWidth x tileHeight). + |_FUNC_(expr1) - Returns a set of new raster tiles within threshold in MBs. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReference.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReference.scala index f4213eee7..19f30ab23 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReference.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_GeoReference.scala @@ -36,7 +36,7 @@ object RST_GeoReference extends WithExpressionInfo { override def name: String = "rst_georeference" - override def usage: String = "_FUNC_(expr1) - Extracts geo reference from a raster." + override def usage: String = "_FUNC_(expr1) - Extracts geo reference from a raster tile." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoData.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoData.scala index 8f10b89cb..1bff1c5ae 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoData.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetNoData.scala @@ -44,15 +44,14 @@ object RST_GetNoData extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns a raster clipped by provided vector. + |_FUNC_(expr1) - Returns no data values for raster tile bands. |""".stripMargin override def example: String = """ | Examples: | > SELECT _FUNC_(raster_tile); - | {index_id, clipped_raster, parentPath, driver} - | {index_id, clipped_raster, parentPath, driver} + | [0.0, -9999.0, ...] | ... | """.stripMargin diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdataset.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdataset.scala index a87f6fa25..709f1aca8 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdataset.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_GetSubdataset.scala @@ -35,7 +35,7 @@ object RST_GetSubdataset extends WithExpressionInfo { override def name: String = "rst_getsubdataset" - override def usage: String = "_FUNC_(expr1, expr2) - Extracts subdataset raster." + override def usage: String = "_FUNC_(expr1, expr2) - Extracts subdataset raster tile." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Height.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Height.scala index ceb638f29..d39d1cf70 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Height.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Height.scala @@ -25,7 +25,7 @@ object RST_Height extends WithExpressionInfo { override def name: String = "rst_height" - override def usage: String = "_FUNC_(expr1) - Returns height of the raster." + override def usage: String = "_FUNC_(expr1) - Returns height of the raster tile." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoData.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoData.scala index 8cf226664..c97c4365d 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoData.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_InitNoData.scala @@ -59,7 +59,7 @@ object RST_InitNoData extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns a raster clipped by provided vector. + |_FUNC_(expr1) - Initializes the nodata value of the raster bands. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmpty.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmpty.scala index 4a5f5034f..15fca50c5 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmpty.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_IsEmpty.scala @@ -30,7 +30,7 @@ object RST_IsEmpty extends WithExpressionInfo { override def name: String = "rst_isempty" - override def usage: String = "_FUNC_(expr1) - Returns true if the raster is empty." + override def usage: String = "_FUNC_(expr1) - Returns true if the raster tile is empty." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebra.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebra.scala index 53e84d96b..24d941b4b 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebra.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MapAlgebra.scala @@ -103,7 +103,7 @@ object RST_MapAlgebra extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2) - Performs map algebra on the rasters. + |_FUNC_(expr1, expr2) - Performs map algebra on the raster tiles. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSize.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSize.scala index 804c4f195..772cb4b3b 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSize.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MemSize.scala @@ -25,7 +25,7 @@ object RST_MemSize extends WithExpressionInfo { override def name: String = "rst_memsize" - override def usage: String = "_FUNC_(expr1) - Returns number of bytes for in memory representation of the raster." + override def usage: String = "_FUNC_(expr1) - Returns number of bytes for in memory representation of the raster tile." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Merge.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Merge.scala index cb9907848..08df40d43 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Merge.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Merge.scala @@ -47,7 +47,7 @@ object RST_Merge extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns a raster that is a result of merging an array of rasters. + |_FUNC_(expr1) - Merge (mosaic) an array of raster tile columns. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAgg.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAgg.scala index 5902eac3b..fc69523d9 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAgg.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MergeAgg.scala @@ -116,7 +116,7 @@ object RST_MergeAgg { db.orNull, "rst_merge_agg", """ - | _FUNC_(tiles)) - Merges rasters into a single raster. + | _FUNC_(tiles)) - Aggregate merge of raster tiles. """.stripMargin, "", """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetaData.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetaData.scala index 8a96ff0d1..72236e8ba 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetaData.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_MetaData.scala @@ -25,7 +25,7 @@ object RST_MetaData extends WithExpressionInfo { override def name: String = "rst_metadata" - override def usage: String = "_FUNC_(expr1) - Extracts metadata from a raster dataset." + override def usage: String = "_FUNC_(expr1) - Extracts metadata from a raster tile dataset." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVI.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVI.scala index fa595fd4b..5f4ca5743 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVI.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_NDVI.scala @@ -53,7 +53,7 @@ object RST_NDVI extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2, expr3) - Returns a raster contains NDVI index computed by bands provided by red_index and nir_index. + |_FUNC_(expr1, expr2, expr3) - NDVI index computed by raster tile red_index and nir_index bands. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBands.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBands.scala index f5dd09551..5fad0186f 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBands.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_NumBands.scala @@ -25,7 +25,7 @@ object RST_NumBands extends WithExpressionInfo { override def name: String = "rst_numbands" - override def usage: String = "_FUNC_(expr1) - Returns number of bands in the raster." + override def usage: String = "_FUNC_(expr1) - Returns number of bands in the raster tile." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeight.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeight.scala index d1c3713ef..63e060552 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeight.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelHeight.scala @@ -36,7 +36,7 @@ object RST_PixelHeight extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns pixel height in the raster. + |_FUNC_(expr1) - Returns pixel height in the raster tile. |The width is a hypotenuse of a right triangle formed by scaleY and skewX. |""".stripMargin diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidth.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidth.scala index 6a4956e9e..8fc18f759 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidth.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_PixelWidth.scala @@ -36,7 +36,7 @@ object RST_PixelWidth extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns pixel width in the raster. + |_FUNC_(expr1) - Returns pixel width in the raster tile. |The width is a hypotenuse of a right triangle formed by scaleX and skewY. |""".stripMargin diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvg.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvg.scala index 5c0f2ba4a..8e5980d50 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvg.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridAvg.scala @@ -34,7 +34,7 @@ object RST_RasterToGridAvg extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2) - Returns a collection of grid index cells with the average pixel value for each band of the raster. + |_FUNC_(expr1, expr2) - Returns a collection of grid index cells with the average pixel value for each band of the raster tile. | The output type is array>>. | Raster mask is taken into account and only valid pixels are used for the calculation. |""".stripMargin diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCount.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCount.scala index 2fd36a986..4e0f61037 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCount.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridCount.scala @@ -34,7 +34,7 @@ object RST_RasterToGridCount extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2) - Returns a collection of grid index cells with the number of pixels per cell for each band of the raster. + |_FUNC_(expr1, expr2) - Returns a collection of grid index cells with the number of pixels per cell for each band of the raster tile. | The output type is array>>. | Raster mask is taken into account and only valid pixels are used for the calculation. |""".stripMargin diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMax.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMax.scala index 55cc88b2b..d6b5d947e 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMax.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMax.scala @@ -34,7 +34,7 @@ object RST_RasterToGridMax extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2) - Returns a collection of grid index cells with the max pixel value per cell for each band of the raster. + |_FUNC_(expr1, expr2) - Returns a collection of grid index cells with the max pixel value per cell for each band of the raster tile. | The output type is array>>. | Raster mask is taken into account and only valid pixels are used for the calculation. |""".stripMargin diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedian.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedian.scala index 4f799d273..1a1e602ec 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedian.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMedian.scala @@ -36,7 +36,7 @@ object RST_RasterToGridMedian extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2) - Returns a collection of grid index cells with the median pixel value per cell for each band of the raster. + |_FUNC_(expr1, expr2) - Returns a collection of grid index cells with the median pixel value per cell for each band of the raster tile. | The output type is array>>. | Raster mask is taken into account and only valid pixels are used for the calculation. |""".stripMargin diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMin.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMin.scala index 541cbbdab..8af76f1f4 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMin.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToGridMin.scala @@ -34,7 +34,7 @@ object RST_RasterToGridMin extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2) - Returns a collection of grid index cells with the min pixel value per cell for each band of the raster. + |_FUNC_(expr1, expr2) - Returns a collection of grid index cells with the min pixel value per cell for each band of the raster tile. | The output type is array>>. | Raster mask is taken into account and only valid pixels are used for the calculation. |""".stripMargin diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoord.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoord.scala index 42b9a928a..0fba20cca 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoord.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoord.scala @@ -47,7 +47,7 @@ object RST_RasterToWorldCoord extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2, expr3) - Returns the (x, y) pixel in world coordinates using geo transform of the raster. + |_FUNC_(expr1, expr2, expr3) - Returns the (x, y) pixel in world coordinates using geo transform of the raster tile. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordX.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordX.scala index 4bd06646a..719689a90 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordX.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordX.scala @@ -42,7 +42,7 @@ object RST_RasterToWorldCoordX extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2, expr3) - Returns the x coordinate of the pixel in world coordinates using geo transform of the raster. + |_FUNC_(expr1, expr2, expr3) - Returns the x coordinate of the pixel in world coordinates using geo transform of the raster tile. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordY.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordY.scala index 262d6bbad..661e07fcf 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordY.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_RasterToWorldCoordY.scala @@ -42,7 +42,7 @@ object RST_RasterToWorldCoordY extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2, expr3) - Returns the y coordinate of the pixel in world coordinates using geo transform of the raster. + |_FUNC_(expr1, expr2, expr3) - Returns the y coordinate of the pixel in world coordinates using geo transform of the raster tile. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTile.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTile.scala index 4465866dc..1222c7e01 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTile.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ReTile.scala @@ -43,7 +43,7 @@ object RST_ReTile extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2, expr3) - Returns a set of new rasters with the specified tile size (tileWidth x tileHeight). + |_FUNC_(expr1, expr2, expr3) - Returns a set of new raster tile with the specified size (tileWidth x tileHeight). |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Rotation.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Rotation.scala index c3cd097c7..17dd5a937 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Rotation.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Rotation.scala @@ -31,7 +31,7 @@ object RST_Rotation extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns the rotation angle of the raster with respect to equator. + |_FUNC_(expr1) - Returns the rotation angle of the raster tile with respect to equator. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRID.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRID.scala index c8bce06b7..b1bf01464 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRID.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SRID.scala @@ -35,7 +35,7 @@ object RST_SRID extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns SRID of the raster. + |_FUNC_(expr1) - Returns SRID of the raster tile. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleX.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleX.scala index c16891871..0713b4ce8 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleX.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleX.scala @@ -29,7 +29,7 @@ object RST_ScaleX extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns scale X in the raster. + |_FUNC_(expr1) - Returns scale X in the raster tile. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleY.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleY.scala index 3b0779763..3937fff4c 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleY.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ScaleY.scala @@ -29,7 +29,7 @@ object RST_ScaleY extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns scale Y in the raster. + |_FUNC_(expr1) - Returns scale Y in the raster tile. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoData.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoData.scala index 911271d33..bec5c3227 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoData.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SetNoData.scala @@ -66,7 +66,7 @@ object RST_SetNoData extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2) - Returns a raster clipped by provided vector. + |_FUNC_(expr1, expr2) - Sets the nodata value of the raster tile for all bands. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewX.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewX.scala index ee3d0c4dd..a3d0107fd 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewX.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewX.scala @@ -29,7 +29,7 @@ object RST_SkewX extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns skew X in the raster. + |_FUNC_(expr1) - Returns skew X in the raster tile. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewY.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewY.scala index ff9903687..2b8db20b2 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewY.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_SkewY.scala @@ -29,7 +29,7 @@ object RST_SkewY extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1) - Returns skew Y in the raster. + |_FUNC_(expr1) - Returns skew Y in the raster tile. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Subdatasets.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Subdatasets.scala index 8c58e7f74..1e17c2e25 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Subdatasets.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Subdatasets.scala @@ -30,7 +30,7 @@ object RST_Subdatasets extends WithExpressionInfo { override def name: String = "rst_subdatasets" - override def usage: String = "_FUNC_(expr1) - Extracts subdataset paths and descriptions from a raster dataset." + override def usage: String = "_FUNC_(expr1) - Extracts subdataset paths and descriptions from a raster tile dataset." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Subdivide.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Subdivide.scala index 9692ccf2d..d689a262d 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Subdivide.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Subdivide.scala @@ -35,13 +35,14 @@ object RST_Subdivide extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2) - Returns a set of new rasters with same aspect ratio that are not larger than the threshold memory footprint. + |_FUNC_(expr1, expr2) - Returns a set of new raster tiles with same aspect ratio that are not larger than the + | threshold memory footprint in MBs. |""".stripMargin override def example: String = """ | Examples: - | > SELECT _FUNC_(raster_tile, 256); + | > SELECT _FUNC_(raster_tile, 32); | {index_id, raster_tile, tile_width, tile_height} | {index_id, raster_tile, tile_width, tile_height} | ... diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Summary.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Summary.scala index 6351d47f2..b115b0973 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Summary.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Summary.scala @@ -38,7 +38,7 @@ object RST_Summary extends WithExpressionInfo { override def name: String = "rst_summary" - override def usage: String = "_FUNC_(expr1) - Generates GDAL summary for the raster." + override def usage: String = "_FUNC_(expr1) - Generates GDAL summary for the raster tile." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Tessellate.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Tessellate.scala index e2fa3cd22..317754f4e 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Tessellate.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Tessellate.scala @@ -45,7 +45,7 @@ object RST_Tessellate extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2) - Returns a set of new rasters with the specified resolution within configured grid. + |_FUNC_(expr1, expr2) - Returns a set of new raster tiles with the specified resolution within configured grid. |""".stripMargin override def example: String = diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTiles.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTiles.scala index 5866e00aa..2c3768513 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTiles.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_ToOverlappingTiles.scala @@ -45,7 +45,7 @@ object RST_ToOverlappingTiles extends WithExpressionInfo { override def usage: String = """ - |_FUNC_(expr1, expr2, expr3, expr4) - Returns a set of new rasters with the specified tile size (tileWidth x tileHeight). + |_FUNC_(expr1, expr2, expr3, expr4) - Returns a set of new raster tiles with the specified tile size (tileWidth x tileHeight). | The tiles will overlap by the specified amount. |""".stripMargin diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpen.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpen.scala index b364d39da..2af8cf7a5 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpen.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_TryOpen.scala @@ -27,7 +27,7 @@ object RST_TryOpen extends WithExpressionInfo { override def name: String = "rst_tryopen" - override def usage: String = "_FUNC_(expr1) - Returns true if the raster can be opened." + override def usage: String = "_FUNC_(expr1) - Returns true if the raster tile can be opened." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftX.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftX.scala index 4f050bc7e..3ff0fdd67 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftX.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftX.scala @@ -27,7 +27,7 @@ object RST_UpperLeftX extends WithExpressionInfo { override def name: String = "rst_upperleftx" - override def usage: String = "_FUNC_(expr1) - Returns upper left x coordinate." + override def usage: String = "_FUNC_(expr1) - Returns upper left x coordinate of the raster tile." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftY.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftY.scala index 0e052e3ae..5ef56ba96 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftY.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_UpperLeftY.scala @@ -27,7 +27,7 @@ object RST_UpperLeftY extends WithExpressionInfo { override def name: String = "rst_upperlefty" - override def usage: String = "_FUNC_(expr1) - Returns upper left y coordinate." + override def usage: String = "_FUNC_(expr1) - Returns upper left y coordinate of the raster tile." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Width.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Width.scala index 4bd56686a..0dfc596b6 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Width.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_Width.scala @@ -25,7 +25,7 @@ object RST_Width extends WithExpressionInfo { override def name: String = "rst_width" - override def usage: String = "_FUNC_(expr1) - Returns width of the raster." + override def usage: String = "_FUNC_(expr1) - Returns width of the raster tile." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoord.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoord.scala index 2d0884a81..0de5cb009 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoord.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoord.scala @@ -40,7 +40,7 @@ object RST_WorldToRasterCoord extends WithExpressionInfo { override def name: String = "rst_worldtorastercoord" - override def usage: String = "_FUNC_(expr1, expr2, expr3) - Returns x and y coordinates (pixel, line) of the pixel." + override def usage: String = "_FUNC_(expr1, expr2, expr3) - Returns x and y coordinates (pixel, line) of the raster tile pixel coord." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordX.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordX.scala index 26c888fe1..7f23d7c9e 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordX.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordX.scala @@ -37,7 +37,7 @@ object RST_WorldToRasterCoordX extends WithExpressionInfo { override def name: String = "rst_worldtorastercoordx" - override def usage: String = "_FUNC_(expr1, expr2, expr3) - Returns x coordinate (pixel, line) of the pixel." + override def usage: String = "_FUNC_(expr1, expr2, expr3) - Returns x coordinate (pixel, line) of the raster tile pixel coord." override def example: String = """ diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordY.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordY.scala index 8bb125faa..3e82f9614 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordY.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/raster/RST_WorldToRasterCoordY.scala @@ -37,7 +37,7 @@ object RST_WorldToRasterCoordY extends WithExpressionInfo { override def name: String = "rst_worldtorastercoordy" - override def usage: String = "_FUNC_(expr1, expr2, expr3) - Returns y coordinate (pixel, line) of the pixel." + override def usage: String = "_FUNC_(expr1, expr2, expr3) - Returns y coordinate (pixel, line) of the raster tile pixel coord." override def example: String = """ From 7874c5c517eadbddc39e47dc180c6a5f23573f73 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Mon, 22 Jan 2024 14:55:03 -0500 Subject: [PATCH 2/9] Added missing functions to existing raster docs (and raster.py). --- docs/source/api/raster-functions.rst | 309 ++++++++++++++++++++++++-- docs/source/api/spatial-functions.rst | 63 ++++++ docs/source/api/spatial-indexing.rst | 16 +- python/mosaic/api/raster.py | 215 +++++++++++++----- 4 files changed, 519 insertions(+), 84 deletions(-) diff --git a/docs/source/api/raster-functions.rst b/docs/source/api/raster-functions.rst index 27442fec2..40ae8ae89 100644 --- a/docs/source/api/raster-functions.rst +++ b/docs/source/api/raster-functions.rst @@ -224,7 +224,7 @@ rst_combineavg SELECT rst_combineavg(array(tile1,tile2,tile3)) FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ - | rst_combineavg(array(tile1,tile2,tile3)) | + | rst_combineavg(array(tile1,tile2,tile3)) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -281,6 +281,179 @@ rst_combineavgagg | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ + +rst_derivedband +************** + +.. function:: rst_derivedband(tiles, python_func, func_name) + + Combine an array of raster tiles using provided python function. + The rasters must have the same extent, number of bands, and pixel type. + The rasters must have the same pixel size and coordinate reference system. + The output raster will have the same extent as the input rasters. + The output raster will have the same number of bands as the input rasters. + The output raster will have the same pixel type as the input rasters. + The output raster will have the same pixel size as the input rasters. + The output raster will have the same coordinate reference system as the input rasters. + + :param tiles: A column containing an array of raster tiles. + :type col: Column (ArrayType(RasterTileType)) + :param python_func: A function to evaluate in python. + :type col: Column (StringType) + :param func_name: name of the function to evaluate in python. + :type col: Column (StringType) + :rtype: Column: RasterTileType + + :example: + +.. tabs:: + .. code-tab:: py + + df\ + .select( + F.array("tile1","tile2","tile3")).alias("tiles"), + F.lit( + """ + import numpy as np + def average(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, raster_ysize, buf_radius, gt, **kwargs): + out_ar[:] = np.sum(in_ar, axis=0) / len(in_ar) + """).alias("py_func1"), + F.lit("average").alias("func1_name") + )\ + .select(mos.rst_deriveband("tiles","py_func1","func1_name")).limit(1).display() + +----------------------------------------------------------------------------------------------------------------+ + | rst_derivedband(tiles,py_func1,func1_name) | + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: scala + + df + .select( + array("tile1","tile2","tile3")).alias("tiles"), + lit( + """ + |import numpy as np + |def average(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, raster_ysize, buf_radius, gt, **kwargs): + | out_ar[:] = np.sum(in_ar, axis=0) / len(in_ar) + |""".stripMargin).as("py_func1"), + lit("average").as("func1_name") + ) + .select(mos.rst_deriveband("tiles","py_func1","func1_name")).limit(1).show + +----------------------------------------------------------------------------------------------------------------+ + | rst_derivedband(tiles,py_func1,func1_name) | + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: sql + SELECT + rst_derivedband(array(tile1,tile2,tile3)) as tiles, + """ + import numpy as np + def average(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, raster_ysize, buf_radius, gt, **kwargs): + out_ar[:] = np.sum(in_ar, axis=0) / len(in_ar) + """ as py_func1, + "average" as funct1_name + FROM table LIMIT 1 + +----------------------------------------------------------------------------------------------------------------+ + | rst_derivedband(tiles,py_func1,func1_name) | + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ + + +rst_derivedbandagg +***************** + +.. function:: rst_derivedbandagg(tile, python_func, func_name) + + Combines a group by statement over aggregated raster tiles by using the provided python function. + The rasters must have the same extent, number of bands, and pixel type. + The rasters must have the same pixel size and coordinate reference system. + The output raster will have the same extent as the input rasters. + The output raster will have the same number of bands as the input rasters. + The output raster will have the same pixel type as the input rasters. + The output raster will have the same pixel size as the input rasters. + The output raster will have the same coordinate reference system as the input rasters. + + :param tile: A grouped column containing raster tile(s). + :type col: Column (RasterTileType) + :param python_func: A function to evaluate in python. + :type col: Column (StringType) + :param func_name: name of the function to evaluate in python. + :type col: Column (StringType) + :rtype: Column: RasterTileType + + :example: + +.. tabs:: + .. code-tab:: py + from textwrap import dedent + df\ + .select( + "date", "tile", + F.lit(dedent( + """ + import numpy as np + def average(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, raster_ysize, buf_radius, gt, **kwargs): + out_ar[:] = np.sum(in_ar, axis=0) / len(in_ar) + """)).alias("py_func1"), + F.lit("average").alias("func1_name") + )\ + .groupBy("date", "py_func1", "func1_name")\ + .agg(mos.rst_derivedbandagg("tile","py_func1","func1_name")).limit(1).display() + +----------------------------------------------------------------------------------------------------------------+ + | rst_derivedbandagg(tile,py_func1,func1_name) | + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: scala + + df + .select( + "date", "tile" + lit( + """ + |import numpy as np + |def average(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, raster_ysize, buf_radius, gt, **kwargs): + | out_ar[:] = np.sum(in_ar, axis=0) / len(in_ar) + |""".stripMargin).as("py_func1"), + lit("average").as("func1_name") + ) + .groupBy("date", "py_func1", "func1_name") + .agg(mos.rst_derivedbandagg("tile","py_func1","func1_name")).limit(1).show + +----------------------------------------------------------------------------------------------------------------+ + | rst_derivedbandagg(tile,py_func1,func1_name) | + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: sql + SELECT + date, py_func1, func1_name, + rst_derivedbandagg(tile, py_func1, func1_name) + FROM SELECT ( + date, tile, + """ + import numpy as np + def average(in_ar, out_ar, xoff, yoff, xsize, ysize, raster_xsize, raster_ysize, buf_radius, gt, **kwargs): + out_ar[:] = np.sum(in_ar, axis=0) / len(in_ar) + """ as py_func1, + "average" as func1_name + FROM table + ) + GROUP BY date, py_func1, func1_name + LIMIT 1 + +----------------------------------------------------------------------------------------------------------------+ + | rst_derivedbandagg(tile,py_func1,func1_name) | + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ + + rst_frombands ************** @@ -327,7 +500,63 @@ rst_frombands SELECT rst_frombands(array(tile1,tile2,tile3)) FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ - | rst_frombands(array(tile1,tile2,tile3)) | + | rst_frombands(array(tile1,tile2,tile3)) | + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ + +rst_fromcontent +************ + +.. function:: rst_fromcontent(raster_bin, driver, ) + + Returns a tile from raster data. + The raster must be a binary. + The driver must be one that GDAL can read. + If the size_in_MB parameter is specified, the raster will be split into tiles of the specified size. + If the size_in_MB parameter is not specified or if the size_in_Mb < 0, the raster will only be split if + it exceeds Integer.MAX_VALUE. The split will be at a threshold of 64MB in this case. + + :param raster_bin: A column containing the raster data. + :type col: Column (BinaryType) + :param size_in_MB: Optional parameter to specify the size of the raster tile in MB. Default is not to split the input. + :type col: Column (IntegerType) + :rtype: Column: RasterTileType + + :example: + +.. tabs:: + .. code-tab:: py + # binary is python bytearray data type + df = spark.read.format("binaryFile")\ + .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral")\ + df.select(mos.rst_fromcontent("content")).limit(1).display() + +----------------------------------------------------------------------------------------------------------------+ + | rst_fromcontent(content) | + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: scala + //binary is scala/java Array(Byte) data type + val df = spark.read + .format("binaryFile") + .load("dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") + df.select(rst_fromcontent(col("content"))).limit(1).show(false) + +----------------------------------------------------------------------------------------------------------------+ + | rst_fromcontent(content) | + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: sql + + CREATE TABLE IF NOT EXISTS TABLE coral_netcdf + USING binaryFile + OPTIONS (path "dbfs:/FileStore/geospatial/mosaic/sample_raster_data/binary/netcdf-coral") + SELECT rst_fromcontent(content) FROM coral_netcdf LIMIT 1 + +----------------------------------------------------------------------------------------------------------------+ + | rst_fromcontent(content) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -524,9 +753,9 @@ rst_getsubdataset SELECT rst_getsubdataset(tile, "sst") FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ | rst_getsubdataset(tile, sst) | - +----------------------------------------------------------------------------------------------------------------+ - | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | - +----------------------------------------------------------------------------------------------------------------+ + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ rst_height ********** @@ -599,7 +828,7 @@ rst_initnodata df.select(mos.rst_initnodata("tile")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ - | rst_initnodata(tile) | + | rst_initnodata(tile) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -608,7 +837,7 @@ rst_initnodata df.select(rst_initnodata(col("tile"))).limit(1).show +----------------------------------------------------------------------------------------------------------------+ - | rst_initnodata(tile) | + | rst_initnodata(tile) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -617,7 +846,7 @@ rst_initnodata SELECT rst_initnodata(tile) FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ - | rst_initnodata(tile) | + | rst_initnodata(tile) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -666,6 +895,56 @@ rst_isempty |false | +--------------------+ + +rst_mapalgebra +******** + +.. function:: rst_mapalgebra(tile, json_spec) + + Performs map algebra on the raster tile. + Rasters are provided as 'A' to 'Z' values. + Bands are provided as 0..n values. + Uses gdal_calc: command line raster calculator with numpy syntax. Use any basic arithmetic supported by numpy + arrays (such as +, -, *, and /) along with logical operators (such as >, <, =). For this distributed implementation, + all rasters must have the same dimensions and no projection checking is performed. + + :param tile: A column containing the raster tile. + :type col: Column (RasterTileType) + :param json_spec: A column containing the map algebra operation specification. + :type col: Column (StringType) + :rtype: Column: RasterTileType + + :example: + +.. tabs:: + .. code-tab:: py + + df.select(mos.rst_mapalgebra("tile", "{calc: 'A+B', A_index: 0, B_index: 1}").alias("tile").limit(1).display() + +----------------------------------------------------------------------------------------------------------------+ + | tile | + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: scala + + df.select(mos.rst_mapalgebra("tile", "{calc: 'A+B', A_index: 0, B_index: 1}").as("tile")).limit(1).show + +----------------------------------------------------------------------------------------------------------------+ + | tile | + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ + + .. code-tab:: sql + + SELECT rst_mapalgebra(tile, "{calc: 'A+B', A_index: 0, B_index: 1}") as tile FROM table LIMIT 1 + +----------------------------------------------------------------------------------------------------------------+ + | tile | + +----------------------------------------------------------------------------------------------------------------+ + | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | + +----------------------------------------------------------------------------------------------------------------+ + + rst_memsize ************* @@ -739,7 +1018,7 @@ rst_merge df.select(F.array("tile1", "tile2", "tile3").alias("tiles"))\ .select(mos.rst_merge("tiles")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ - | rst_merge(tiles) | + | rst_merge(tiles) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -749,7 +1028,7 @@ rst_merge df.select(array("tile1", "tile2", "tile3").as("tiles")) .select(rst_merge(col("tiles"))).limit(1).show +----------------------------------------------------------------------------------------------------------------+ - | rst_merge(tiles) | + | rst_merge(tiles) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -758,7 +1037,7 @@ rst_merge SELECT rst_merge(array(tile1,tile2,tile3)) FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ - | rst_merge(array(tile1,tile2,tile3)) | + | rst_merge(array(tile1,tile2,tile3)) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -1109,7 +1388,7 @@ rst_rastertogridavg .. code-tab:: scala df.select(rst_rastertogridavg(col("tile"), lit(3))).show - +--------------------------------------------------------------------------------------------------------------------+ + +--------------------------------------------------------------------------------------------------------------------+ | rst_rastertogridavg(tile, 3) | +--------------------------------------------------------------------------------------------------------------------+ | [[{"cellID": "593176490141548543", "measure": 0}, {"cellID": "593386771740360703", "measure": 1.2037735849056603}, | @@ -2364,7 +2643,7 @@ rst_worldtorastercoord df.select(mos.rst_worldtorastercoord('tile', F.lit(-160.1), F.lit(40.0))).display() +------------------------------------------------------------------------------------------------------------------+ - | rst_worldtorastercoord(tile, -160.1, 40.0) | + | rst_worldtorastercoord(tile, -160.1, 40.0) | +------------------------------------------------------------------------------------------------------------------+ | {"x": 398, "y": 997} | +------------------------------------------------------------------------------------------------------------------+ @@ -2373,7 +2652,7 @@ rst_worldtorastercoord df.select(rst_worldtorastercoord(col("tile"), lit(-160.1), lit(40.0))).show +------------------------------------------------------------------------------------------------------------------+ - | rst_worldtorastercoord(tile, -160.1, 40.0) | + | rst_worldtorastercoord(tile, -160.1, 40.0) | +------------------------------------------------------------------------------------------------------------------+ | {"x": 398, "y": 997} | +------------------------------------------------------------------------------------------------------------------+ @@ -2382,7 +2661,7 @@ rst_worldtorastercoord SELECT rst_worldtorastercoord(tile, -160.1, 40.0) FROM table +------------------------------------------------------------------------------------------------------------------+ - | rst_worldtorastercoord(tile, -160.1, 40.0) | + | rst_worldtorastercoord(tile, -160.1, 40.0) | +------------------------------------------------------------------------------------------------------------------+ | {"x": 398, "y": 997} | +------------------------------------------------------------------------------------------------------------------+ diff --git a/docs/source/api/spatial-functions.rst b/docs/source/api/spatial-functions.rst index e2ccc9464..f20b8a398 100644 --- a/docs/source/api/spatial-functions.rst +++ b/docs/source/api/spatial-functions.rst @@ -344,6 +344,69 @@ st_centroid |POINT (25.454545454545453, 26.96969696969697)| +---------------------------------------------+ + +st_concavehull +************* + +.. function:: st_concavehull(geom, concavity, ) + + Compute the concave hull of a geometry or multi-geometry object. It uses concavity and has_holes to determine + the concave hull. Param concavity is the fraction of the difference between the longest and shortest edge lengths in + the Delaunay Triangulation. If set to 1, this is the same as the convex hull. If set to 0, it produces + maximum concaveness. Param has_holes is a boolean that determines whether the concave hull can have holes. If set to + true, the concave hull can have holes. If set to false, the concave hull will not have holes. + + :param geom: The input geometry + :type col: Column + :param concavity: The concavity of the hull + :type col: Column (DoubleType) + :param has_holes: Whether the hull has holes, default false + :type col: Column (BooleanType) + :rtype: Column + + :example: + +.. tabs:: + .. code-tab:: py + + df = spark.createDataFrame([{'wkt': 'MULTIPOINT ((10 40), (40 30), (20 20), (30 10))'}]) + df.select(st_concavehull('wkt'), lit(0.1))).show(1, False) + +---------------------------------------------+ + |st_concavehull(wkt, 0.1) | + +---------------------------------------------+ + |POLYGON ((10 40, 20 20, 30 10, 40 30, 10 40))| + +---------------------------------------------+ + + .. code-tab:: scala + + val df = List(("MULTIPOINT ((10 40), (40 30), (20 20), (30 10))")).toDF("wkt") + df.select(st_concavehull(col("wkt"), lit(0.1))).show(false) + +---------------------------------------------+ + |st_concavehull(wkt, 0.1) | + +---------------------------------------------+ + |POLYGON ((10 40, 20 20, 30 10, 40 30, 10 40))| + +---------------------------------------------+ + + .. code-tab:: sql + + SELECT st_convexhull("MULTIPOINT ((10 40), (40 30), (20 20), (30 10))", 0.1) + +---------------------------------------------+ + |st_concavehull(wkt, 0.1) | + +---------------------------------------------+ + |POLYGON ((10 40, 20 20, 30 10, 40 30, 10 40))| + +---------------------------------------------+ + + .. code-tab:: r R + + df <- createDataFrame(data.frame(wkt = "MULTIPOINT ((10 40), (40 30), (20 20), (30 10))")) + showDF(select(df, st_concavehull(column("wkt"), lit(0.1)))) + +---------------------------------------------+ + |st_concavehull(wkt, 0.1) | + +---------------------------------------------+ + |POLYGON ((10 40, 20 20, 30 10, 40 30, 10 40))| + +---------------------------------------------+ + + st_convexhull ************* diff --git a/docs/source/api/spatial-indexing.rst b/docs/source/api/spatial-indexing.rst index 3074668c5..0ea059cb4 100644 --- a/docs/source/api/spatial-indexing.rst +++ b/docs/source/api/spatial-indexing.rst @@ -383,7 +383,7 @@ grid_boundary grid_tessellate *************** -.. function:: grid_tessellate(geometry, resolution, keep_core_geometries) +.. function:: grid_tessellate(geometry, resolution, ) Cuts the original `geometry` into several pieces along the grid index borders at the specified `resolution`. @@ -405,9 +405,9 @@ grid_tessellate :param geometry: Geometry :type geometry: Column :param resolution: Index resolution - :type resolution: Column: Integer - :param keep_core_geometries: Whether to keep the core geometries or set them to null - :type keep_core_geometries: Column: Boolean + :type resolution: Column (IntegerType) + :param keep_core_geometries: Whether to keep the core geometries or set them to null, default true + :type keep_core_geometries: Column (BooleanType) :rtype: Column: ArrayType[MosaicType] :example: @@ -505,7 +505,7 @@ grid_tessellate grid_tessellateexplode ********************** -.. function:: grid_tessellateexplode(geometry, resolution, keep_core_geometries) +.. function:: grid_tessellateexplode(geometry, resolution, ) Cuts the original `geometry` into several pieces along the grid index borders at the specified `resolution`. @@ -527,9 +527,9 @@ grid_tessellateexplode :param geometry: Geometry :type geometry: Column :param resolution: Index resolution - :type resolution: Column: Integer - :param keep_core_geometries: Whether to keep the core geometries or set them to null - :type keep_core_geometries: Column: Boolean + :type resolution: Column (IntegerType) + :param keep_core_geometries: Whether to keep the core geometries or set them to null, default true + :type keep_core_geometries: Column (BooleanType) :rtype: Column: MosaicType :example: diff --git a/python/mosaic/api/raster.py b/python/mosaic/api/raster.py index a2d203363..4d3d689e6 100644 --- a/python/mosaic/api/raster.py +++ b/python/mosaic/api/raster.py @@ -15,7 +15,9 @@ "rst_boundingbox", "rst_clip", "rst_combineavg", + "rst_combineavgagg", "rst_derivedband", + "rst_derivedbandagg", "rst_frombands", "rst_fromcontent", "rst_fromfile", @@ -25,8 +27,10 @@ "rst_height", "rst_initnodata", "rst_isempty", + "rst_mapalgebra", "rst_memsize", "rst_merge", + "rst_mergeagg", "rst_metadata", "rst_ndvi", "rst_numbands", @@ -69,7 +73,7 @@ def rst_bandmetadata(raster_tile: ColumnOrName, band: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. band : Column (IntegerType) Band index, starts from 1. @@ -91,7 +95,7 @@ def rst_boundingbox(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -113,14 +117,14 @@ def rst_clip(raster_tile: ColumnOrName, geometry: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. geometry : Column (StringType) The geometry to clip the raster to. Returns ------- - Column (StringType) + Column (RasterTileType) Mosaic raster tile struct column. """ @@ -135,12 +139,12 @@ def rst_combineavg(raster_tiles: ColumnOrName) -> Column: Parameters ---------- - raster_tiles : Column (ArrayType(StringType)) + raster_tiles : Column (ArrayType(RasterTileType)) Raster tiles to combine. Returns ------- - Column (RasterTile) + Column (RasterTileType) The combined raster tile. """ @@ -149,6 +153,26 @@ def rst_combineavg(raster_tiles: ColumnOrName) -> Column: ) +def rst_combineavgagg(raster_tile: ColumnOrName) -> Column: + """ + Combines the aggregate raster tiles into a single tile. + + Parameters + ---------- + raster_tile : Column (RasterTileType) + Aggregate raster tile col to combine. + + Returns + ------- + Column (RasterTileType) + The combined raster tile. + + """ + return config.mosaic_context.invoke_function( + "rst_combineavgagg", pyspark_to_java_column(raster_tile) + ) + + def rst_derivedband(raster_tile: ColumnOrName, python_func: ColumnOrName, func_name: ColumnOrName) -> Column: """ Creates a new band by applying the given python function to the input rasters. @@ -156,7 +180,7 @@ def rst_derivedband(raster_tile: ColumnOrName, python_func: ColumnOrName, func_n Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. python_func : Column (StringType) The python function to apply to the bands. @@ -165,7 +189,7 @@ def rst_derivedband(raster_tile: ColumnOrName, python_func: ColumnOrName, func_n Returns ------- - Column (StringType) + Column (RasterTileType) Mosaic raster tile struct column. """ @@ -177,6 +201,34 @@ def rst_derivedband(raster_tile: ColumnOrName, python_func: ColumnOrName, func_n ) +def rst_derivedbandagg(raster_tile: ColumnOrName, python_func: ColumnOrName, func_name: ColumnOrName) -> Column: + """ + Creates a new band by applying the given python function to the input rasters. + The result is a raster tile. + + Parameters + ---------- + raster_tile : Column (RasterTileType) + Aggregate raster tile col to derive from. + python_func : Column (StringType) + The python function to apply to the bands. + func_name : Column (StringType) + The name of the function. + + Returns + ------- + Column (RasterTileType) + Mosaic raster tile struct column. + + """ + return config.mosaic_context.invoke_function( + "rst_derivedbandagg", + pyspark_to_java_column(raster_tile), + pyspark_to_java_column(python_func), + pyspark_to_java_column(func_name), + ) + + def rst_georeference(raster_tile: ColumnOrName) -> Column: """ Returns GeoTransform of the raster as a GT array of doubles. @@ -190,7 +242,7 @@ def rst_georeference(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -210,7 +262,7 @@ def rst_getnodata(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. band : Column (IntegerType) Band index, starts from 1. @@ -233,14 +285,14 @@ def rst_getsubdataset(raster_tile: ColumnOrName, subdataset: ColumnOrName) -> Co Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. subdataset : Column (IntegerType) The index of the subdataset to get. Returns ------- - Column (StringType) + Column (RasterTileType) Mosaic raster tile struct of the subdataset. """ @@ -255,7 +307,7 @@ def rst_height(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -275,12 +327,12 @@ def rst_initnodata(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns ------- - Column (StringType) + Column (RasterTileType) Mosaic raster tile struct column. """ @@ -293,7 +345,7 @@ def rst_isempty(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -307,11 +359,30 @@ def rst_isempty(raster_tile: ColumnOrName) -> Column: ) +def rst_mapalgebra(raster_tile: ColumnOrName, json_spec: ColumnOrName) -> Column: + """ + Parameters + ---------- + raster_tile : Column (RasterTileType) + Mosaic raster tile struct column. + json_spec : Column (StringType) + + Returns + ------- + Column (RasterTileType) + Mosaic raster tile struct column. + + """ + return config.mosaic_context.invoke_function( + "rst_mapalgebra", pyspark_to_java_column(raster_tile, json_spec) + ) + + def rst_memsize(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -329,7 +400,7 @@ def rst_metadata(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -351,12 +422,12 @@ def rst_merge(raster_tiles: ColumnOrName) -> Column: Parameters ---------- - raster_tiles : Column (ArrayType(StringType)) + raster_tiles : Column (ArrayType(RasterTileType)) Raster tiles to merge. Returns ------- - Column (StringType) + Column (RasterTileType) Mosaic raster tile struct of the merged raster. """ @@ -365,6 +436,28 @@ def rst_merge(raster_tiles: ColumnOrName) -> Column: ) +def rst_mergeagg(raster_tile: ColumnOrName) -> Column: + """ + Merges (mosaics) the aggregated raster tiles into a single tile. + The result is Mosaic raster tile struct of the merged raster. + The result is stored in the checkpoint directory. + + Parameters + ---------- + raster_tile : Column (RasterTileType) + Aggregate Raster tile column to merge. + + Returns + ------- + Column (RasterTileType) + Mosaic raster tile struct of the merged raster. + + """ + return config.mosaic_context.invoke_function( + "rst_mergeagg", pyspark_to_java_column(raster_tile) + ) + + def rst_frombands(bands: ColumnOrName) -> Column: """ Stack an array of bands into a raster tile. @@ -373,12 +466,12 @@ def rst_frombands(bands: ColumnOrName) -> Column: Parameters ---------- - bands : Column (ArrayType(StringType)) + bands : Column (ArrayType(RasterTileType)) Raster tiles of the bands to merge. Returns ------- - Column (StringType) + Column (RasterTileType) Mosaic raster tile struct of the band stacking. """ @@ -391,7 +484,7 @@ def rst_numbands(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -413,7 +506,7 @@ def rst_ndvi(raster_tile: ColumnOrName, band1: ColumnOrName, band2: ColumnOrName Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. band1 : Column (IntegerType) The first band index. @@ -422,7 +515,7 @@ def rst_ndvi(raster_tile: ColumnOrName, band1: ColumnOrName, band2: ColumnOrName Returns ------- - Column (StringType) + Column (RasterTileType) Mosaic raster tile structs of the NDVI raster. """ @@ -438,7 +531,7 @@ def rst_pixelheight(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -456,7 +549,7 @@ def rst_pixelwidth(raster_tile: ColumnOrName) -> Column: """ Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -479,7 +572,7 @@ def rst_rastertogridavg(raster_tile: ColumnOrName, resolution: ColumnOrName) -> Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -504,7 +597,7 @@ def rst_rastertogridcount(raster_tile: ColumnOrName, resolution: ColumnOrName) - Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -529,7 +622,7 @@ def rst_rastertogridmax(raster_tile: ColumnOrName, resolution: ColumnOrName) -> Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -554,7 +647,7 @@ def rst_rastertogridmedian(raster_tile: ColumnOrName, resolution: ColumnOrName) Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -579,7 +672,7 @@ def rst_rastertogridmin(raster_tile: ColumnOrName, resolution: ColumnOrName) -> Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -605,7 +698,7 @@ def rst_rastertoworldcoord( Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -631,12 +724,12 @@ def rst_rastertoworldcoordx( Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns ------- - Column (StringType) + Column (DoubleType) The X coordinate of the point after applying the GeoTransform of the raster. """ @@ -657,13 +750,13 @@ def rst_rastertoworldcoordy( Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns ------- - Column (StringType) - The X coordinate of the point after applying the GeoTransform of the raster. + Column (DoubleType) + The Y coordinate of the point after applying the GeoTransform of the raster. """ return config.mosaic_context.invoke_function( @@ -685,12 +778,12 @@ def rst_retile( Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns ------- - Column (StringType) + Column (RasterTileType) Mosaic raster tile structs from the exploded retile. """ @@ -710,7 +803,7 @@ def rst_rotation(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -730,7 +823,7 @@ def rst_scalex(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -750,7 +843,7 @@ def rst_scaley(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -770,14 +863,14 @@ def rst_setnodata(raster_tile: ColumnOrName, nodata: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. nodata : Column (DoubleType) The nodata value to set. Returns ------- - Column (StringType) + Column (RasterTileType) Mosaic raster tile struct column. """ @@ -794,7 +887,7 @@ def rst_skewx(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -814,7 +907,7 @@ def rst_skewy(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -835,7 +928,7 @@ def rst_srid(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -857,7 +950,7 @@ def rst_subdatasets(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -880,7 +973,7 @@ def rst_summary(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -901,14 +994,14 @@ def rst_tessellate(raster_tile: ColumnOrName, resolution: ColumnOrName) -> Colum Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. resolution : Column (IntegerType) The resolution of the tiles. Returns ------- - Column (RasterTiles) + Column (RasterTileType) A struct containing the tiles of the raster. """ @@ -981,7 +1074,7 @@ def rst_tryopen(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -1002,14 +1095,14 @@ def rst_subdivide(raster_tile: ColumnOrName, size_in_mb: ColumnOrName) -> Column Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. size_in_mb : Column (IntegerType) The size of the tiles in MB. Returns ------- - Column (RasterTiles) + Column (RasterTileType) A collection of tiles of the raster. """ @@ -1027,7 +1120,7 @@ def rst_upperleftx(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -1048,7 +1141,7 @@ def rst_upperlefty(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -1068,7 +1161,7 @@ def rst_width(raster_tile: ColumnOrName) -> Column: Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -1093,7 +1186,7 @@ def rst_worldtorastercoord( Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -1122,7 +1215,7 @@ def rst_worldtorastercoordx( Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns @@ -1151,7 +1244,7 @@ def rst_worldtorastercoordy( Parameters ---------- - raster_tile : Column (StringType) + raster_tile : Column (RasterTileType) Mosaic raster tile struct column. Returns From 37146eb58d4d5f141359ec0d8e0ea8c56c6673f5 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Mon, 22 Jan 2024 17:22:24 -0500 Subject: [PATCH 3/9] updated install-gdal and rasterio-udfs docs; more api cleanup. --- docs/source/api/raster-functions.rst | 196 +++++++++++++------------- docs/source/api/rasterio-udfs.rst | 124 +++++++++++++--- docs/source/api/spatial-functions.rst | 60 ++++---- docs/source/usage/install-gdal.rst | 66 +++++++-- 4 files changed, 282 insertions(+), 164 deletions(-) diff --git a/docs/source/api/raster-functions.rst b/docs/source/api/raster-functions.rst index 40ae8ae89..59d2368b7 100644 --- a/docs/source/api/raster-functions.rst +++ b/docs/source/api/raster-functions.rst @@ -30,7 +30,7 @@ rst_bandmetadata Metadata is return as a map of key value pairs. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param band: The band number to extract metadata for. :type band: Column (IntegerType) :rtype: Column: MapType(StringType, StringType) @@ -94,7 +94,7 @@ rst_boundingbox Returns the bounding box of the raster as a polygon geometry. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: StructType(DoubleType, DoubleType, DoubleType, DoubleType) :example: @@ -142,9 +142,9 @@ rst_clip The output raster will have the same coordinate reference system as the input raster. :param tile: A column containing the raster tile. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param geometry: A column containing the geometry to clip the raster to. - :type col: Column (GeometryType) + :type geometry: Column (GeometryType) :rtype: Column: RasterTileType :example: @@ -192,7 +192,7 @@ rst_combineavg The output raster will have the same coordinate reference system as the input rasters. :param tiles: A column containing an array of raster tiles. - :type col: Column (ArrayType(RasterTileType)) + :type tiles: Column (ArrayType(RasterTileType)) :rtype: Column: RasterTileType :example: @@ -244,7 +244,7 @@ rst_combineavgagg The output raster will have the same coordinate reference system as the input rasters. :param tile: A grouped column containing raster tiles. - :type col: Column (ArrayType(RasterTileType)) + :type tile: Column (RasterTileType) :rtype: Column: RasterTileType :example: @@ -297,11 +297,11 @@ rst_derivedband The output raster will have the same coordinate reference system as the input rasters. :param tiles: A column containing an array of raster tiles. - :type col: Column (ArrayType(RasterTileType)) + :type tiles: Column (ArrayType(RasterTileType)) :param python_func: A function to evaluate in python. - :type col: Column (StringType) + :type python_func: Column (StringType) :param func_name: name of the function to evaluate in python. - :type col: Column (StringType) + :type func_name: Column (StringType) :rtype: Column: RasterTileType :example: @@ -379,11 +379,11 @@ rst_derivedbandagg The output raster will have the same coordinate reference system as the input rasters. :param tile: A grouped column containing raster tile(s). - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param python_func: A function to evaluate in python. - :type col: Column (StringType) + :type python_func: Column (StringType) :param func_name: name of the function to evaluate in python. - :type col: Column (StringType) + :type func_name: Column (StringType) :rtype: Column: RasterTileType :example: @@ -469,7 +469,7 @@ rst_frombands The output raster will have the same coordinate reference system as the input rasters. :param tiles: A column containing an array of raster tiles. - :type col: Column (ArrayType(RasterTileType)) + :type tiles: Column (ArrayType(RasterTileType)) :rtype: Column: RasterTileType :example: @@ -518,9 +518,9 @@ rst_fromcontent it exceeds Integer.MAX_VALUE. The split will be at a threshold of 64MB in this case. :param raster_bin: A column containing the raster data. - :type col: Column (BinaryType) + :type raster_bin: Column (BinaryType) :param size_in_MB: Optional parameter to specify the size of the raster tile in MB. Default is not to split the input. - :type col: Column (IntegerType) + :type size_in_MB: Column (IntegerType) :rtype: Column: RasterTileType :example: @@ -575,9 +575,9 @@ rst_fromfile it exceeds Integer.MAX_VALUE. The split will be at a threshold of 64MB in this case. :param path: A column containing the path to a raster file. - :type col: Column (StringType) + :type path: Column (StringType) :param size_in_MB: Optional parameter to specify the size of the raster tile in MB. Default is not to split the input. - :type col: Column (IntegerType) + :type size_in_MB: Column (IntegerType) :rtype: Column: RasterTileType :example: @@ -634,7 +634,7 @@ rst_georeference GT(5) n-s pixel resolution / pixel height (negative value for a north-up image). :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: MapType(StringType, DoubleType) :example: @@ -678,7 +678,7 @@ rest_getnodata Returns the nodata value of the raster tile bands. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: ArrayType(DoubleType) :example: @@ -722,9 +722,9 @@ rst_getsubdataset The subdataset name must be a valid subdataset name for the raster. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param name: A column containing the name of the subdataset to return. - :type col: Column (StringType) + :type name: Column (StringType) :rtype: Column: RasterTileType :example: @@ -765,7 +765,7 @@ rst_height Returns the height of the raster tile in pixels. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: IntegerType :example: @@ -818,7 +818,7 @@ rst_initnodata The default nodata value for DoubleType is Double.MinValue (-1.7976931348623157E308). :param tile: A column containing the raster tile. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: RasterTileType :example: @@ -859,7 +859,7 @@ rst_isempty Returns true if the raster tile is empty. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: BooleanType :example: @@ -909,9 +909,9 @@ rst_mapalgebra all rasters must have the same dimensions and no projection checking is performed. :param tile: A column containing the raster tile. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param json_spec: A column containing the map algebra operation specification. - :type col: Column (StringType) + :type json_spec: Column (StringType) :rtype: Column: RasterTileType :example: @@ -953,7 +953,7 @@ rst_memsize Returns size of the raster tile in bytes. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: LongType :example: @@ -1006,8 +1006,8 @@ rst_merge The output raster will have the same pixel size as the highest resolution input rasters. The output raster will have the same coordinate reference system as the input rasters. - :param tile: A column containing an array of raster tiles. - :type col: Column (ArrayType(RasterTileType)) + :param tiles: A column containing an array of raster tiles. + :type tiles: Column (ArrayType(RasterTileType)) :rtype: Column: RasterTileType :example: @@ -1045,7 +1045,7 @@ rst_merge rst_mergeagg ************ -.. function:: rst_mergeagg(tiles) +.. function:: rst_mergeagg(tile) Combines a grouped aggregate of raster tiles into a single raster. The rasters do not need to have the same extent. @@ -1063,7 +1063,7 @@ rst_mergeagg The output raster will have the same coordinate reference system as the input rasters. :param tile: A column containing raster tiles. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: RasterTileType :example: @@ -1109,7 +1109,7 @@ rst_metadata Metadata is return as a map of key value pairs. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: MapType(StringType, StringType) :example: @@ -1182,11 +1182,11 @@ rst_ndvi The output raster will have the same coordinate reference system as the input raster. :param tile: A column containing the raster tile. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param red_band_num: A column containing the band number of the red band. - :type col: Column (IntegerType) + :type red_band_num: Column (IntegerType) :param nir_band_num: A column containing the band number of the near infrared band. - :type col: Column (IntegerType) + :type nir_band_num: Column (IntegerType) :rtype: Column: RasterTileType :example: @@ -1227,7 +1227,7 @@ rst_numbands Returns number of bands in the raster tile. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: IntegerType :example: @@ -1271,7 +1271,7 @@ rst_pixelheight Returns the height of the pixel in the raster tile derived via GeoTransform. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: DoubleType :example: @@ -1315,7 +1315,7 @@ rst_pixelwidth Returns the width of the pixel in the raster tile derived via GeoTransform. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: DoubleType :example: @@ -1362,9 +1362,9 @@ rst_rastertogridavg The value/measure for each cell is the average of the pixel values in the cell. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param resolution: A resolution of the grid index system. - :type col: Column (IntegerType) + :type resolution: Column (IntegerType) :rtype: Column: ArrayType(ArrayType(StructType(LongType|StringType, DoubleType))) :example: @@ -1431,9 +1431,9 @@ rst_rastertogridcount The value/measure for each cell is the average of the pixel values in the cell. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param resolution: A resolution of the grid index system. - :type col: Column (IntegerType) + :type resolution: Column (IntegerType) :rtype: Column: ArrayType(ArrayType(StructType(LongType|StringType, DoubleType))) :example: @@ -1500,9 +1500,9 @@ rst_rastertogridmax The value/measure for each cell is the maximum pixel value. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param resolution: A resolution of the grid index system. - :type col: Column (IntegerType) + :type resolution: Column (IntegerType) :rtype: Column: ArrayType(ArrayType(StructType(LongType|StringType, DoubleType))) :example: @@ -1569,9 +1569,9 @@ rst_rastertogridmedian The value/measure for each cell is the median pixel value. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param resolution: A resolution of the grid index system. - :type col: Column (IntegerType) + :type resolution: Column (IntegerType) :rtype: Column: ArrayType(ArrayType(StructType(LongType|StringType, DoubleType))) :example: @@ -1638,9 +1638,9 @@ rst_rastertogridmin The value/measure for each cell is the median pixel value. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param resolution: A resolution of the grid index system. - :type col: Column (IntegerType) + :type resolution: Column (IntegerType) :rtype: Column: ArrayType(ArrayType(StructType(LongType|StringType, DoubleType))) :example: @@ -1706,11 +1706,11 @@ rst_rastertoworldcoord The coordinates are computed using the GeoTransform of the raster to respect the projection. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param x: x coordinate of the pixel. - :type col: Column (IntegerType) + :type x: Column (IntegerType) :param y: y coordinate of the pixel. - :type col: Column (IntegerType) + :type y: Column (IntegerType) :rtype: Column: StringType :example: @@ -1752,11 +1752,11 @@ rst_rastertoworldcoordx The result is the X coordinate of the point after applying the GeoTransform of the raster. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param x: x coordinate of the pixel. - :type col: Column (IntegerType) + :type x: Column (IntegerType) :param y: y coordinate of the pixel. - :type col: Column (IntegerType) + :type y: Column (IntegerType) :rtype: Column: DoubleType :example: @@ -1798,11 +1798,11 @@ rst_rastertoworldcoordy The result is the X coordinate of the point after applying the GeoTransform of the raster. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param x: x coordinate of the pixel. - :type col: Column (IntegerType) + :type x: Column (IntegerType) :param y: y coordinate of the pixel. - :type col: Column (IntegerType) + :type y: Column (IntegerType) :rtype: Column: DoubleType :example: @@ -1846,11 +1846,11 @@ rst_retile The result set is automatically exploded. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param width: The width of the tiles. - :type col: Column (IntegerType) + :type width: Column (IntegerType) :param height: The height of the tiles. - :type col: Column (IntegerType) + :type height: Column (IntegerType) :rtype: Column: (RasterTileType) :example: @@ -1896,7 +1896,7 @@ rst_rotation The rotation is computed using the GeoTransform of the raster. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: DoubleType :example: @@ -1940,7 +1940,7 @@ rst_scalex Computes the scale of the raster tile in the X direction. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: DoubleType :example: @@ -1981,7 +1981,7 @@ rst_scaley Computes the scale of the raster tile in the Y direction. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: DoubleType :example: @@ -2025,9 +2025,9 @@ rst_setnodata If an array of values is passed, the nodata value is set for each band of the raster. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param nodata: The nodata value to set. - :type col: Column (DoubleType) / ArrayType(DoubleType) + :type nodata: Column (DoubleType) / ArrayType(DoubleType) :rtype: Column: (RasterTileType) :example: @@ -2072,7 +2072,7 @@ rst_skewx Computes the skew of the raster tile in the X direction. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: DoubleType :example: @@ -2113,7 +2113,7 @@ rst_skewy Computes the skew of the raster tile in the Y direction. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: DoubleType :example: @@ -2157,7 +2157,7 @@ rst_srid .. note:: For complex CRS definition the EPSG code may default to 0. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: DoubleType :example: @@ -2200,7 +2200,7 @@ rst_subdatasets The result is a map of the subdataset path to the subdatasets and the description of the subdatasets. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: MapType(StringType, StringType) :example: @@ -2248,7 +2248,7 @@ rst_subdivide .. function:: rst_subdivide(tile, sizeInMB) Subdivides the raster tile to the given tile size in MB. The result is a collection of new raster tiles. - The tiles are split until the expected size of a tile is < sizeInMB. + The tiles are split until the expected size of a tile is < size_in_MB. The tile is always split in 4 tiles. This ensures that the tiles are always split in the same way. The aspect ratio of the tiles is preserved. The result set is automatically exploded. @@ -2256,8 +2256,9 @@ rst_subdivide .. note:: The size of the tiles is approximate. Due to compressions and other effects we cannot guarantee the size of the tiles in MB. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) - :param sizeInMB: The size of the tiles in MB. + :type tile: Column (RasterTileType) + :param size_in_MB: The size of the tiles in MB. + :type size_in_MB: Column (IntegerType) :example: @@ -2304,7 +2305,7 @@ rst_summary The result is stored as JSON. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: MapType(StringType, StringType) :example: @@ -2361,8 +2362,9 @@ rst_tessellate The output tiles have same number of bands as the input rasters. :param tile: A column containing the raster tile. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param resolution: The resolution of the supported grid. + :type resolution: Column (IntegerType) :example: @@ -2410,13 +2412,13 @@ rst_tooverlappingtiles The output tiles have same number of bands as the input rasters. :param tile: A column containing the raster tile. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :param width: The width of the tiles in pixels. - :type col: Column (IntegerType) + :type width: Column (IntegerType) :param height: The height of the tiles in pixels. - :type col: Column (IntegerType) + :type height: Column (IntegerType) :param overlap: The overlap of the tiles in percentage. - :type col: Column (IntegerType) + :type overlap: Column (IntegerType) :example: @@ -2459,7 +2461,7 @@ rst_tryopen Tries to open the raster tile. If the raster cannot be opened the result is false and if the raster can be opened the result is true. :param tile: A column containing the raster tile. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: BooleanType :example: @@ -2501,7 +2503,7 @@ rst_upperleftx The value is computed based on GeoTransform. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: DoubleType :example: @@ -2543,7 +2545,7 @@ rst_upperlefty The value is computed based on GeoTransform. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: DoubleType :example: @@ -2585,7 +2587,7 @@ rst_width :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) + :type tile: Column (RasterTileType) :rtype: Column: IntegerType :example: @@ -2629,11 +2631,11 @@ rst_worldtorastercoord The coordinates are resolved using GeoTransform. :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) - :param x: X world coordinate. - :type col: Column (StringType) - :param y: Y world coordinate. - :type col: Column (StringType) + :type tile: Column (RasterTileType) + :param xworld: X world coordinate. + :type xworld: Column (DoubleType) + :param yworld: Y world coordinate. + :type yworld: Column (DoubleType) :rtype: Column: StructType(IntegerType, IntegerType) :example: @@ -2679,11 +2681,11 @@ rst_worldtorastercoordx :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) - :param x: X world coordinate. - :type col: Column (StringType) - :param y: Y world coordinate. - :type col: Column (StringType) + :type tile: Column (RasterTileType) + :param xworld: X world coordinate. + :type xworld: Column (DoubleType) + :param yworld: Y world coordinate. + :type yworld: Column (DoubleType) :rtype: Column: IntegerType :example: @@ -2729,11 +2731,11 @@ rst_worldtorastercoordy :param tile: A column containing the raster tile. For < 0.3.11 string representing the path to a raster file or byte array.A column containing the path to a raster file. - :type col: Column (RasterTileType) - :param x: X world coordinate. - :type col: Column (StringType) - :param y: Y world coordinate. - :type col: Column (StringType) + :type tile: Column (RasterTileType) + :param xworld: X world coordinate. + :type xworld: Column (DoubleType) + :param yworld: Y world coordinate. + :type yworld: Column (DoubleType) :rtype: Column: IntegerType :example: diff --git a/docs/source/api/rasterio-udfs.rst b/docs/source/api/rasterio-udfs.rst index 2d5c0bb5e..c1ea51672 100644 --- a/docs/source/api/rasterio-udfs.rst +++ b/docs/source/api/rasterio-udfs.rst @@ -234,40 +234,118 @@ Firstly we will create a spark DataFrame from a directory of raster files. +-----------------------------------------------------------+------------------------------+-----------+---------------------+-------+-------+-----------+----------------------+-------------+-------+---------------------------------------------------------------------------------------------------------------+ -Next we will define a function that will write a given raster file to disk. +Next we will define a function that will write a given raster file to disk. A "gotcha" to keep in mind is that you do +not want to have a file context manager open when you go to write out its context as the context manager will not yet +have been flushed. .. code-block:: python - import numpy as np - import rasterio - from rasterio.io import MemoryFile - from io import BytesIO - from pyspark.sql.functions import udf - from pathlib import Path + @udf("string") + def write_raster(raster, driver, file_id, fuse_dir): + from io import BytesIO + from pathlib import Path + from pyspark.sql.functions import udf + from rasterio.io import MemoryFile + import numpy as np + import rasterio + import shutil + import tempfile + + # - [1] populate the initial profile + # # profile is needed in order to georeference the image + profile = None + with MemoryFile(BytesIO(raster)) as memfile: + with memfile.open() as dataset: + profile = dataset.profile + + # - [2] get the correct extension + extensions_map = rasterio.drivers.raster_driver_extensions() + driver_map = {v: k for k, v in extensions_map.items()} + extension = driver_map[driver] #e.g. GTiff + file_name = f"{file_id}.{extension}" + + # - [3] write local raster + # - this is showing a single band [1] + # being written + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_path = f"{tmp_dir}/{file_name}" + # here you can update profile using .update method + # example https://rasterio.readthedocs.io/en/latest/topics/writing.html + with rasterio.open( + tmp_path, + "w", + **profile + ) as dst: + dst.write(raster,1) # <- adjust as needed + + # - [4] copy to fuse path + Path(fuse_dir).mkdir(parents=True, exist_ok=True) + fuse_path = f"{fuse_dir}/{file_name}" + if not os.path.exists(fuse_path): + shutil.copyfile(tmp_path, fuse_path) + return fuse_path + +Finally we will apply the function to the DataFrame. + +.. code-block:: python + + df.select( + write_raster( + "tile.raster", + lit("GTiff").alias("driver"), + "uuid", + lit("dbfs:/path/to/output/dir").alias("fuse_dir") + ) + ).display() + +-------------------------------------+ + | write_raster(raster, driver, uuid, fuse_dir)| + +-------------------------------------+ + | dbfs:/path/to/output/dir/1234.tif | + | dbfs:/path/to/output/dir/4545.tif | + | dbfs:/path/to/output/dir/3215.tif | + | ... | + +-------------------------------------+ + +Sometimes you don't need to be quite as fancy. Consider when you simply want to specify to write out raster contents, +in this case as a TIF (or you could specify the extension or include it in the file_id). + +.. code-block:: python @udf("string") - def write_raster(raster, file_id, parent_dir): - with MemoryFile(BytesIO(raster)) as memfile: - with memfile.open() as dataset: - Path(outputpath).mkdir(parents=True, exist_ok=True) - extensions_map = rasterio.drivers.raster_driver_extensions() - driver_map = {v: k for k, v in extensions_map.items()} - extension = driver_map[dataset.driver] - path = f"{parent_dir}/{file_id}.{extension}" - # If you want to write the raster to a different format - # you can update the profile here. Note that the extension - # should match the driver format - with rasterio.open(path, "w", **dataset.profile) as dst: - dst.write(dataset.read()) - return path + def write_tif(raster, file_id, fuse_dir): + from pathlib import Path + import os + import shutil + import tempfile + + Path(fuse_dir).mkdir(parents=True, exist_ok=True) + file_name = f"{file_id}.tif" + fuse_path = f"{fuse_dir}/{file_name}" + if not os.path.exists(fuse_path): + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_path = f"{tmp_dir}/{file_name}" + # - write within the tmp_dir context + # - flush the writer before copy + tmp_file = open(tmp_path, "wb") + tmp_file.write(raster) # <- write entire binary content + tmp_file.close() + # - copy local to fuse + shutil.copyfile(tmp_path, fuse_path) + return fuse_path Finally we will apply the function to the DataFrame. .. code-block:: python - df.select(write_raster("tile.raster", "uuid", lit("dbfs:/path/to/output/dir"))).show() + df.select( + write_tif( + "tile.raster", + "uuid", + lit("dbfs:/path/to/output/dir").alias("fuse_dir") + ) + ).display() +-------------------------------------+ - | write_raster(raster, output, output)| + | write_tif(raster, uuid, fuse_dir)| +-------------------------------------+ | dbfs:/path/to/output/dir/1234.tif | | dbfs:/path/to/output/dir/4545.tif | diff --git a/docs/source/api/spatial-functions.rst b/docs/source/api/spatial-functions.rst index f20b8a398..b5a8473c8 100644 --- a/docs/source/api/spatial-functions.rst +++ b/docs/source/api/spatial-functions.rst @@ -236,7 +236,7 @@ st_bufferloop .. figure:: ../images/st_bufferloop/geom.png :figclass: doc-figure - Fig 1. ST_BufferLoop(geom, 0.02, 0.04) + Fig 1. ST_BufferLoop(wkt, 0.02, 0.04) st_centroid2D [Deprecated] ************************** @@ -348,7 +348,7 @@ st_centroid st_concavehull ************* -.. function:: st_concavehull(geom, concavity, ) +.. function:: st_concavehull(col, concavity, ) Compute the concave hull of a geometry or multi-geometry object. It uses concavity and has_holes to determine the concave hull. Param concavity is the fraction of the difference between the longest and shortest edge lengths in @@ -356,12 +356,12 @@ st_concavehull maximum concaveness. Param has_holes is a boolean that determines whether the concave hull can have holes. If set to true, the concave hull can have holes. If set to false, the concave hull will not have holes. - :param geom: The input geometry + :param col: The input geometry :type col: Column :param concavity: The concavity of the hull - :type col: Column (DoubleType) + :type concavity: Column (DoubleType) :param has_holes: Whether the hull has holes, default false - :type col: Column (BooleanType) + :type has_holes: Column (BooleanType) :rtype: Column :example: @@ -867,13 +867,13 @@ st_haversine st_hasvalidcoordinates ********************** -.. function:: st_hasvalidcoordinates(geom, crs, which) +.. function:: st_hasvalidcoordinates(col, crs, which) Checks if all points in `geom` are valid with respect to crs bounds. CRS bounds can be provided either as bounds or as reprojected_bounds. - :param geom: Geometry - :type geom: Column + :param col: Geometry + :type col: Column :param crs: CRS name (EPSG ID), e.g. "EPSG:2192" :type crs: Column :param which: Check against geographic `"bounds"` or geometric `"reprojected_bounds"` bounds. @@ -1243,12 +1243,12 @@ st_perimeter st_rotate ********* -.. function:: st_rotate(geom, td) +.. function:: st_rotate(col, td) Rotates `geom` using the rotational factor `td`. - :param geom: Geometry - :type geom: Column + :param col: Geometry + :type col: Column :param td: Rotation (in radians) :type td: Column (DoubleType) :rtype: Column @@ -1302,12 +1302,12 @@ st_rotate st_scale ******** -.. function:: st_scale(geom, xd, yd) +.. function:: st_scale(col, xd, yd) Scales `geom` using the scaling factors `xd` and `yd`. - :param geom: Geometry - :type geom: Column + :param col: Geometry + :type col: Column :param xd: Scale factor in the x-direction :type xd: Column (DoubleType) :param yd: Scale factor in the y-direction @@ -1360,12 +1360,12 @@ st_scale st_setsrid ********** -.. function:: st_setsrid(geom, srid) +.. function:: st_setsrid(col, srid) Sets the Coordinate Reference System well-known identifier (SRID) for `geom`. - :param geom: Geometry - :type geom: Column + :param col: Geometry + :type col: Column :param srid: The spatial reference identifier of `geom`, expressed as an integer, e.g. `4326` for EPSG:4326 / WGS84 :type srid: Column (IntegerType) :rtype: Column @@ -1420,12 +1420,12 @@ st_setsrid st_simplify *********** -.. function:: st_simplify(geom, tol) +.. function:: st_simplify(col, tol) Returns the simplified geometry. - :param geom: Geometry - :type geom: Column + :param col: Geometry + :type col: Column :param tol: Tolerance :type tol: Column :rtype: Column: Geometry @@ -1478,12 +1478,12 @@ st_simplify st_srid ******* -.. function:: st_srid(geom) +.. function:: st_srid(col) Looks up the Coordinate Reference System well-known identifier (SRID) for `geom`. - :param geom: Geometry - :type geom: Column + :param col: Geometry + :type col: Column :rtype: Column :example: @@ -1539,12 +1539,12 @@ st_srid st_transform ************ -.. function:: st_transform(geom, srid) +.. function:: st_transform(col, srid) Transforms the horizontal (XY) coordinates of `geom` from the current reference system to that described by `srid`. - :param geom: Geometry - :type geom: Column + :param col: Geometry + :type col: Column :param srid: Target spatial reference system for `geom`, expressed as an integer, e.g. `3857` for EPSG:3857 / Pseudo-Mercator :type srid: Column (IntegerType) :rtype: Column @@ -1578,7 +1578,7 @@ st_transform .. code-tab:: sql - select st_astext(st_transform(st_setsrid(st_geomfromwkt("MULTIPOINT ((10 40), (40 30), (20 20), (30 10))"), 4326), 3857)) + select st_astext(st_transform(st_setsrid(st_geomfromwkt("MULTIPOINT ((10 40), (40 30), (20 20), (30 10))"), 4326) as geom, 3857)) +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |convert_to(st_transform(geom, 3857)) | +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ @@ -1605,12 +1605,12 @@ st_transform st_translate ************ -.. function:: st_translate(geom, xd, yd) +.. function:: st_translate(col, xd, yd) Translates `geom` to a new location using the distance parameters `xd` and `yd`. - :param geom: Geometry - :type geom: Column + :param col: Geometry + :type col: Column :param xd: Offset in the x-direction :type xd: Column (DoubleType) :param yd: Offset in the y-direction diff --git a/docs/source/usage/install-gdal.rst b/docs/source/usage/install-gdal.rst index b3a583051..21f8b2249 100644 --- a/docs/source/usage/install-gdal.rst +++ b/docs/source/usage/install-gdal.rst @@ -4,8 +4,8 @@ GDAL Installation guide Supported platforms ################### -In order to use Mosaic, you must have access to a Databricks cluster running -Databricks Runtime 11.3 or higher. +In order to use Mosaic 0.4 series, you must have access to a Databricks cluster running +Databricks Runtime 13.3 LTS. If you have cluster creation permissions in your Databricks workspace, you can create a cluster using the instructions `here `__. @@ -16,34 +16,72 @@ these permissions and more information about cluster permissions can be found in our documentation `here `__. +.. warning:: + These instructions assume an Assigned cluster is being used (vs a Shared Access cluster), + more on access modes `here `__. + GDAL Installation #################### Setup GDAL files and scripts **************************** Mosaic requires GDAL to be installed on the cluster. The easiest way to do this is to use the -the mos.setup_gdal() function. This function will extract the GDAL files and scripts from the -mosaic library and place them in the /dbfs/FileStore/geospatial/mosaic/gdal/ directory. -This call is no longer needed in versions >= 0.3.12. The shared objects are now included in the -databricks-mosaic-gdal pip installable bundle. +the mos.setup_gdal() function. + +.. note:: + (a) This is close in behavior to Mosaic < 0.4 series (prior to DBR 13), with new options + to pip install Mosaic for either ubuntugis gdal (3.4.3) or jammy default (3.4.1). + (b) `to_fuse_dir` can be one of `/Volumes/..`, `/Workspace/..`, `/dbfs/..`; + however, you should consider `setup_fuse_install()` for Volume based installs as that + exposes more options, to include copying JAR and JNI Shared Objects. + +.. function:: setup_gdal() + + Generate an init script that will install GDAL native libraries on each worker node. + All of the listed parameters are optional. You can have even more control with setup_fuse_install function. + + :param to_fuse_dir: Path to write out the init script for GDAL installation; + default is '/Workspace/Shared/geospatial/mosaic/gdal/jammy'. + :type to_fuse_dir: str + :param with_mosaic_pip: Whether to configure a script that pip installs databricks-mosaic, + fixed to the current version; default is False. + :type with_mosaic_pip: bool + :param with_ubuntugis: Whether to use ubuntugis ppa for GDAL instead of built-in; + default is False. + :type with_ubuntugis: bool + :param script_out_name: name of the script to be written; + default is 'mosaic-gdal-init.sh'. + :type script_out_name: str + :param override_mosaic_version: String value to use to override the mosaic version to install, + e.g. '==0.4.0' or '<0.5,>=0.4'; default is None. + :type override_mosaic_version: str + :rtype: bool + + :example: .. code-block:: py import mosaic as mos mos.enable_mosaic(spark, dbutils) - mos.setup_gdal(spark) - GDAL setup complete. - Shared objects (*.so) stored in: /dbfs/FileStore/geospatial/mosaic/gdal/. - Init script stored in: /dbfs/FileStore/geospatial/mosaic/gdal/. - Please restart the cluster with the generated init script to complete the setup. + mos.setup_gdal() + + +---------------------------------------------------------------------------------------------------------+ + |::: Install setup complete ::: | + +---------------------------------------------------------------------------------------------------------+ + |- Settings: 'with_mosaic_pip'? False, 'with_gdal'? True, 'with_ubuntugis'? False | + | 'jar_copy'? False, 'jni_so_copy'? False, 'override_mosaic_version'? None | + |- Derived: 'mosaic_version'? 0.4.0, 'github_version'? 0.4.0, 'release_version'? None, 'pip_str'? ==0.4.0| + |- Fuse Dir: '/Workspace/Shared/geospatial/mosaic/gdal/jammy' | + |- Init Script: configured and stored at 'mosaic-gdal-init.sh'; add to your cluster and restart, | + | more at https://docs.databricks.com/en/init-scripts/cluster-scoped.html | + +---------------------------------------------------------------------------------------------------------+ + Configure the init script ************************** After the mos.setup_gdal() function has been run, you will need to configure the cluster to use the -init script. For versions >= 0.3.12, we are required to use the following init script: -`here `__. -The init script can be set by clicking on the "Edit" button on the cluster page and adding +init script. The init script can be set by clicking on the "Edit" button on the cluster page and adding the following to the "Advanced Options" section: .. figure:: ../images/init_script.png From a85a0f80b07f22946e4613c40100e5449c99b74f Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Mon, 22 Jan 2024 17:57:12 -0500 Subject: [PATCH 4/9] provided additional st_concavehull variation after R warning. --- .../com/databricks/labs/mosaic/functions/MosaicContext.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index cd516ecc5..fac9fd455 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -563,8 +563,10 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def st_convexhull(geom: Column): Column = ColumnAdapter(ST_ConvexHull(geom.expr, expressionConfig)) def st_concavehull(geom: Column, concavity: Column, allowHoles: Column): Column = ColumnAdapter(ST_ConcaveHull(geom.expr, concavity.cast("double").expr, allowHoles.expr, expressionConfig)) - def st_concavehull(geom: Column, concavity: Double, allowHoles: Boolean = false): Column = + def st_concavehull(geom: Column, concavity: Double, allowHoles: Boolean): Column = ColumnAdapter(ST_ConcaveHull(geom.expr, lit(concavity).cast("double").expr, lit(allowHoles).expr, expressionConfig)) + def st_concavehull(geom: Column, concavity: Double): Column = + ColumnAdapter(ST_ConcaveHull(geom.expr, lit(concavity).cast("double").expr, lit(false).expr, expressionConfig)) def st_difference(geom1: Column, geom2: Column): Column = ColumnAdapter(ST_Difference(geom1.expr, geom2.expr, expressionConfig)) def st_distance(geom1: Column, geom2: Column): Column = ColumnAdapter(ST_Distance(geom1.expr, geom2.expr, expressionConfig)) def st_dimension(geom: Column): Column = ColumnAdapter(ST_Dimension(geom.expr, expressionConfig)) From cae843c4722b29f576d744386689e94aeb928230 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 23 Jan 2024 12:05:47 -0500 Subject: [PATCH 5/9] adjustments from PR feedback; also, deprecated st_instersection_aggregate and st_instersects_aggregate for _agg. --- .../tests/testthat/testVectorFunctions.R | 4 +- .../tests/testthat/testVectorFunctions.R | 4 +- docs/source/api/raster-functions.rst | 22 ++-- docs/source/usage/install-gdal.rst | 20 ++-- python/mosaic/api/aggregators.py | 106 +++++------------- python/mosaic/api/raster.py | 76 +------------ ...gregate.scala => ST_IntersectionAgg.scala} | 10 +- ...Aggregate.scala => ST_IntersectsAgg.scala} | 10 +- .../labs/mosaic/functions/MosaicContext.scala | 35 +++--- .../geometry/ST_IntersectionBehaviors.scala | 16 +-- .../geometry/ST_IntersectsBehaviors.scala | 23 ++-- 11 files changed, 102 insertions(+), 224 deletions(-) rename src/main/scala/com/databricks/labs/mosaic/expressions/geometry/{ST_IntersectionAggregate.scala => ST_IntersectionAgg.scala} (95%) rename src/main/scala/com/databricks/labs/mosaic/expressions/geometry/{ST_IntersectsAggregate.scala => ST_IntersectsAgg.scala} (93%) diff --git a/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testVectorFunctions.R b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testVectorFunctions.R index ad7032134..205aee809 100644 --- a/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testVectorFunctions.R +++ b/R/sparkR-mosaic/sparkrMosaic/tests/testthat/testVectorFunctions.R @@ -78,8 +78,8 @@ test_that("aggregate vector functions behave as intended", { sdf.intersection <- join(sdf.l, sdf.r, sdf.l$left_index == sdf.r$right_index, "inner") sdf.intersection <- summarize( groupBy(sdf.intersection, sdf.intersection$left_id, sdf.intersection$right_id), - agg_intersects = st_intersects_aggregate(column("left_index"), column("right_index")), - agg_intersection = st_intersection_aggregate(column("left_index"), column("right_index")), + agg_intersects = st_intersects_agg(column("left_index"), column("right_index")), + agg_intersection = st_intersection_agg(column("left_index"), column("right_index")), left_geom = first(column("left_geom")), right_geom = first(column("right_geom")) ) diff --git a/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R index f91a0ff8b..2b0416ddd 100644 --- a/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R +++ b/R/sparklyr-mosaic/sparklyrMosaic/tests/testthat/testVectorFunctions.R @@ -92,8 +92,8 @@ test_that("aggregate vector functions behave as intended", { inner_join(sdf.r, by = c("left_index" = "right_index"), keep = TRUE) %>% dplyr::group_by(left_id, right_id) %>% dplyr::summarise( - agg_intersects = st_intersects_aggregate(left_index, right_index), - agg_intersection = st_intersection_aggregate(left_index, right_index), + agg_intersects = st_intersects_agg(left_index, right_index), + agg_intersection = st_intersection_agg(left_index, right_index), left_geom = max(left_geom, 1), right_geom = max(right_geom, 1) ) %>% diff --git a/docs/source/api/raster-functions.rst b/docs/source/api/raster-functions.rst index 59d2368b7..c982e929a 100644 --- a/docs/source/api/raster-functions.rst +++ b/docs/source/api/raster-functions.rst @@ -12,7 +12,7 @@ This is useful for performing spatial joins between raster data and vector data. Mosaic also provides a scalable retiling function that can be used to retile raster data in case of bottlenecking due to large files. All raster functions respect the \"rst\_\" prefix naming convention. Mosaic is operating using raster tile objects only since 0.3.11. Tile objects are created using functions such as rst_fromfile(path_to_raster) -or rst_fromcontent(raster_bin). +or rst_fromcontent(raster_bin, driver). These functions are used as places to start when working with initial data. If you use spark.read.format("gdal") tiles are automatically generated for you. Also, scala does not have a df.display method while python does. In practice you would most often call display(df) in scala for a prettier output, but for brevity, we write df.show in scala. @@ -204,7 +204,7 @@ rst_combineavg .select(F.array("tile1","tile2","tile3")).alias("tiles"))\ .select(mos.rst_combineavg("tiles")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ - | rst_combineavg(tiles) | + | rst_combineavg(tiles) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -215,7 +215,7 @@ rst_combineavg .select(F.array("tile1","tile2","tile3")).as("tiles")) .select(rst_combineavg(col("tiles"))).limit(1).show +----------------------------------------------------------------------------------------------------------------+ - | rst_combineavg(tiles) | + | rst_combineavg(tiles) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -322,7 +322,7 @@ rst_derivedband )\ .select(mos.rst_deriveband("tiles","py_func1","func1_name")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ - | rst_derivedband(tiles,py_func1,func1_name) | + | rst_derivedband(tiles,py_func1,func1_name) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -342,7 +342,7 @@ rst_derivedband ) .select(mos.rst_deriveband("tiles","py_func1","func1_name")).limit(1).show +----------------------------------------------------------------------------------------------------------------+ - | rst_derivedband(tiles,py_func1,func1_name) | + | rst_derivedband(tiles,py_func1,func1_name) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -358,7 +358,7 @@ rst_derivedband "average" as funct1_name FROM table LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ - | rst_derivedband(tiles,py_func1,func1_name) | + | rst_derivedband(tiles,py_func1,func1_name) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -405,7 +405,7 @@ rst_derivedbandagg .groupBy("date", "py_func1", "func1_name")\ .agg(mos.rst_derivedbandagg("tile","py_func1","func1_name")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ - | rst_derivedbandagg(tile,py_func1,func1_name) | + | rst_derivedbandagg(tile,py_func1,func1_name) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -426,7 +426,7 @@ rst_derivedbandagg .groupBy("date", "py_func1", "func1_name") .agg(mos.rst_derivedbandagg("tile","py_func1","func1_name")).limit(1).show +----------------------------------------------------------------------------------------------------------------+ - | rst_derivedbandagg(tile,py_func1,func1_name) | + | rst_derivedbandagg(tile,py_func1,func1_name) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -448,7 +448,7 @@ rst_derivedbandagg GROUP BY date, py_func1, func1_name LIMIT 1 +----------------------------------------------------------------------------------------------------------------+ - | rst_derivedbandagg(tile,py_func1,func1_name) | + | rst_derivedbandagg(tile,py_func1,func1_name) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -480,7 +480,7 @@ rst_frombands df.select(F.array("tile1", "tile2", "tile3").as("tiles"))\ .select(mos.rst_frombands("tiles")).limit(1).display() +----------------------------------------------------------------------------------------------------------------+ - | rst_frombands(tiles) | + | rst_frombands(tiles) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ @@ -491,7 +491,7 @@ rst_frombands .select(array("tile1", "tile2", "tile3").as("tiles")) .select(rst_frombands(col("tiles"))).limit(1).show +----------------------------------------------------------------------------------------------------------------+ - | rst_frombands(tiles) | + | rst_frombands(tiles) | +----------------------------------------------------------------------------------------------------------------+ | {index_id: 593308294097928191, raster: [00 01 10 ... 00], parentPath: "dbfs:/path_to_file", driver: "NetCDF" } | +----------------------------------------------------------------------------------------------------------------+ diff --git a/docs/source/usage/install-gdal.rst b/docs/source/usage/install-gdal.rst index 21f8b2249..7e1b0c19b 100644 --- a/docs/source/usage/install-gdal.rst +++ b/docs/source/usage/install-gdal.rst @@ -66,16 +66,16 @@ the mos.setup_gdal() function. mos.enable_mosaic(spark, dbutils) mos.setup_gdal() - +---------------------------------------------------------------------------------------------------------+ - |::: Install setup complete ::: | - +---------------------------------------------------------------------------------------------------------+ - |- Settings: 'with_mosaic_pip'? False, 'with_gdal'? True, 'with_ubuntugis'? False | - | 'jar_copy'? False, 'jni_so_copy'? False, 'override_mosaic_version'? None | - |- Derived: 'mosaic_version'? 0.4.0, 'github_version'? 0.4.0, 'release_version'? None, 'pip_str'? ==0.4.0| - |- Fuse Dir: '/Workspace/Shared/geospatial/mosaic/gdal/jammy' | - |- Init Script: configured and stored at 'mosaic-gdal-init.sh'; add to your cluster and restart, | - | more at https://docs.databricks.com/en/init-scripts/cluster-scoped.html | - +---------------------------------------------------------------------------------------------------------+ + +-----------------------------------------------------------------------------------------------------------+ + | ::: Install setup complete ::: | + +-----------------------------------------------------------------------------------------------------------+ + | - Settings: 'with_mosaic_pip'? False, 'with_gdal'? True, 'with_ubuntugis'? False | + | 'jar_copy'? False, 'jni_so_copy'? False, 'override_mosaic_version'? None | + | - Derived: 'mosaic_version'? 0.4.0, 'github_version'? 0.4.0, 'release_version'? None, 'pip_str'? ==0.4.0 | + | - Fuse Dir: '/Workspace/Shared/geospatial/mosaic/gdal/jammy' | + | - Init Script: configured and stored at 'mosaic-gdal-init.sh'; add to your cluster and restart, | + | more at https://docs.databricks.com/en/init-scripts/cluster-scoped.html | + +-----------------------------------------------------------------------------------------------------------+ Configure the init script diff --git a/python/mosaic/api/aggregators.py b/python/mosaic/api/aggregators.py index e221d06ba..d68275b73 100644 --- a/python/mosaic/api/aggregators.py +++ b/python/mosaic/api/aggregators.py @@ -9,8 +9,6 @@ ####################### __all__ = [ - "st_intersection_aggregate", - "st_intersects_aggregate", "st_union_agg", "grid_cell_union_agg", "grid_cell_intersection_agg", @@ -22,33 +20,6 @@ ] -def st_intersection_aggregate( - leftIndex: ColumnOrName, rightIndex: ColumnOrName -) -> Column: - """ - Computes the intersection of all `leftIndex` : `rightIndex` pairs - and unions these to produce a single geometry. - - Parameters - ---------- - leftIndex : Column - The index field of the left-hand geometry - rightIndex : Column - The index field of the right-hand geometry - - Returns - ------- - Column - The aggregated intersection geometry. - - """ - return config.mosaic_context.invoke_function( - "st_intersection_aggregate", - pyspark_to_java_column(leftIndex), - pyspark_to_java_column(rightIndex), - ) - - def st_intersection_agg(leftIndex: ColumnOrName, rightIndex: ColumnOrName) -> Column: """ Computes the intersection of all `leftIndex` : `rightIndex` pairs @@ -68,32 +39,7 @@ def st_intersection_agg(leftIndex: ColumnOrName, rightIndex: ColumnOrName) -> Co """ return config.mosaic_context.invoke_function( - "st_intersection_aggregate", - pyspark_to_java_column(leftIndex), - pyspark_to_java_column(rightIndex), - ) - - -def st_intersects_aggregate( - leftIndex: ColumnOrName, rightIndex: ColumnOrName -) -> Column: - """ - Tests if any `leftIndex` : `rightIndex` pairs intersect. - - Parameters - ---------- - leftIndex : Column - The index field of the left-hand geometry - rightIndex : Column - The index field of the right-hand geometry - - Returns - ------- - Column (BooleanType) - - """ - return config.mosaic_context.invoke_function( - "st_intersects_aggregate", + "st_intersection_agg", pyspark_to_java_column(leftIndex), pyspark_to_java_column(rightIndex), ) @@ -116,7 +62,7 @@ def st_intersects_agg(leftIndex: ColumnOrName, rightIndex: ColumnOrName) -> Colu """ return config.mosaic_context.invoke_function( - "st_intersects_aggregate", + "st_intersects_agg", pyspark_to_java_column(leftIndex), pyspark_to_java_column(rightIndex), ) @@ -176,57 +122,65 @@ def grid_cell_union_agg(chips: ColumnOrName) -> Column: ) -def rst_merge_agg(raster: ColumnOrName) -> Column: +def rst_merge_agg(raster_tile: ColumnOrName) -> Column: """ - Returns the raster representing the aggregated union of rasters on some grid cell. + Merges (unions) the aggregated raster tiles into a single tile. + Returns the raster tile representing the aggregated union of rasters on some grid cell. Parameters ---------- - raster: Column + raster_tile : Column (RasterTileType) + Aggregate Raster tile column to merge. Returns ------- - Column - The union raster. + Column (RasterTileType) + Raster tile struct of the union raster. """ return config.mosaic_context.invoke_function( - "rst_merge_agg", pyspark_to_java_column(raster) + "rst_merge_agg", pyspark_to_java_column(raster_tile) ) -def rst_combineavg_agg(raster: ColumnOrName) -> Column: +def rst_combineavg_agg(raster_tile: ColumnOrName) -> Column: """ - Returns the raster representing the aggregated average of rasters. + Returns the raster tile representing the aggregated average of rasters. Parameters ---------- - raster: Column + raster_tile : Column (RasterTileType) + Aggregate raster tile col to combine. Returns ------- - Column - The average raster. + Column (RasterTileType) + The combined raster tile. """ return config.mosaic_context.invoke_function( - "rst_combineavg_agg", pyspark_to_java_column(raster) + "rst_combineavg_agg", pyspark_to_java_column(raster_tile) ) -def rst_derivedband_agg(raster: ColumnOrName, pythonFunc: ColumnOrName, funcName: ColumnOrName) -> Column: +def rst_derivedband_agg(raster_tile: ColumnOrName, python_func: ColumnOrName, func_name: ColumnOrName) -> Column: """ - Returns the raster representing the aggregation of rasters using provided python function. + Returns the raster tile representing the aggregation of rasters using provided python function. Parameters ---------- - raster: Column - pythonFunc: Column - funcName: Column + raster_tile : Column (RasterTileType) + Aggregate raster tile col to derive from. + python_func : Column (StringType) + The python function to apply to the bands. + func_name : Column (StringType) + The name of the function. Returns ------- - Column - The resulting raster. + Column (RasterTileType) + Creates a new band by applying the given python function to the input rasters. + The result is a raster tile. + """ return config.mosaic_context.invoke_function( - "rst_derivedband_agg", pyspark_to_java_column(raster), pyspark_to_java_column(pythonFunc), pyspark_to_java_column(funcName) + "rst_derivedband_agg", pyspark_to_java_column(raster_tile), pyspark_to_java_column(python_func), pyspark_to_java_column(func_name) ) diff --git a/python/mosaic/api/raster.py b/python/mosaic/api/raster.py index 4d3d689e6..6cf24d811 100644 --- a/python/mosaic/api/raster.py +++ b/python/mosaic/api/raster.py @@ -15,9 +15,9 @@ "rst_boundingbox", "rst_clip", "rst_combineavg", - "rst_combineavgagg", + "rst_combineavg_agg", "rst_derivedband", - "rst_derivedbandagg", + "rst_derivedband_agg", "rst_frombands", "rst_fromcontent", "rst_fromfile", @@ -30,7 +30,7 @@ "rst_mapalgebra", "rst_memsize", "rst_merge", - "rst_mergeagg", + "rst_merge_agg", "rst_metadata", "rst_ndvi", "rst_numbands", @@ -153,26 +153,6 @@ def rst_combineavg(raster_tiles: ColumnOrName) -> Column: ) -def rst_combineavgagg(raster_tile: ColumnOrName) -> Column: - """ - Combines the aggregate raster tiles into a single tile. - - Parameters - ---------- - raster_tile : Column (RasterTileType) - Aggregate raster tile col to combine. - - Returns - ------- - Column (RasterTileType) - The combined raster tile. - - """ - return config.mosaic_context.invoke_function( - "rst_combineavgagg", pyspark_to_java_column(raster_tile) - ) - - def rst_derivedband(raster_tile: ColumnOrName, python_func: ColumnOrName, func_name: ColumnOrName) -> Column: """ Creates a new band by applying the given python function to the input rasters. @@ -201,34 +181,6 @@ def rst_derivedband(raster_tile: ColumnOrName, python_func: ColumnOrName, func_n ) -def rst_derivedbandagg(raster_tile: ColumnOrName, python_func: ColumnOrName, func_name: ColumnOrName) -> Column: - """ - Creates a new band by applying the given python function to the input rasters. - The result is a raster tile. - - Parameters - ---------- - raster_tile : Column (RasterTileType) - Aggregate raster tile col to derive from. - python_func : Column (StringType) - The python function to apply to the bands. - func_name : Column (StringType) - The name of the function. - - Returns - ------- - Column (RasterTileType) - Mosaic raster tile struct column. - - """ - return config.mosaic_context.invoke_function( - "rst_derivedbandagg", - pyspark_to_java_column(raster_tile), - pyspark_to_java_column(python_func), - pyspark_to_java_column(func_name), - ) - - def rst_georeference(raster_tile: ColumnOrName) -> Column: """ Returns GeoTransform of the raster as a GT array of doubles. @@ -436,28 +388,6 @@ def rst_merge(raster_tiles: ColumnOrName) -> Column: ) -def rst_mergeagg(raster_tile: ColumnOrName) -> Column: - """ - Merges (mosaics) the aggregated raster tiles into a single tile. - The result is Mosaic raster tile struct of the merged raster. - The result is stored in the checkpoint directory. - - Parameters - ---------- - raster_tile : Column (RasterTileType) - Aggregate Raster tile column to merge. - - Returns - ------- - Column (RasterTileType) - Mosaic raster tile struct of the merged raster. - - """ - return config.mosaic_context.invoke_function( - "rst_mergeagg", pyspark_to_java_column(raster_tile) - ) - - def rst_frombands(bands: ColumnOrName) -> Column: """ Stack an array of bands into a raster tile. diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionAggregate.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionAgg.scala similarity index 95% rename from src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionAggregate.scala rename to src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionAgg.scala index 29fecdf24..5de6e60d9 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionAggregate.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionAgg.scala @@ -9,7 +9,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo} import org.apache.spark.sql.catalyst.trees.BinaryLike import org.apache.spark.sql.types._ -case class ST_IntersectionAggregate( +case class ST_IntersectionAgg( leftChip: Expression, rightChip: Expression, geometryAPIName: String, @@ -27,7 +27,7 @@ case class ST_IntersectionAggregate( override val dataType: DataType = BinaryType private val emptyWKB = geometryAPI.geometry("POLYGON(EMPTY)", "WKT").toWKB - override def prettyName: String = "st_intersection_aggregate" + override def prettyName: String = "st_intersection_agg" private[geometry] def getCellGeom(row: InternalRow, dt: DataType) = { dt.asInstanceOf[StructType].fields.find(_.name == "index_id").map(_.dataType) match { @@ -86,18 +86,18 @@ case class ST_IntersectionAggregate( override def deserialize(storageFormat: Array[Byte]): Array[Byte] = storageFormat - override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): ST_IntersectionAggregate = + override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): ST_IntersectionAgg = copy(leftChip = newLeft, rightChip = newRight) } -object ST_IntersectionAggregate { +object ST_IntersectionAgg { def registryExpressionInfo(db: Option[String]): ExpressionInfo = new ExpressionInfo( classOf[IndexGeometry].getCanonicalName, db.orNull, - "st_reduce_intersection", + "st_intersection_agg", """ | _FUNC_(left_index, right_index)) - Resolves an intersection geometry based on matched indices. """.stripMargin, diff --git a/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsAggregate.scala b/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsAgg.scala similarity index 93% rename from src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsAggregate.scala rename to src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsAgg.scala index aad0bee6d..bf99089f5 100644 --- a/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsAggregate.scala +++ b/src/main/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsAgg.scala @@ -8,7 +8,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, import org.apache.spark.sql.catalyst.trees.BinaryLike import org.apache.spark.sql.types._ -case class ST_IntersectsAggregate( +case class ST_IntersectsAgg( leftChip: Expression, rightChip: Expression, geometryAPIName: String, @@ -23,7 +23,7 @@ case class ST_IntersectsAggregate( override val nullable: Boolean = false override val dataType: DataType = BooleanType - override def prettyName: String = "st_intersects_aggregate" + override def prettyName: String = "st_intersects_agg" override def update(accumulator: Boolean, inputRow: InternalRow): Boolean = { accumulator || { @@ -59,18 +59,18 @@ case class ST_IntersectsAggregate( storageFormat.head.equals(1.asInstanceOf[Byte]) } - override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): ST_IntersectsAggregate = + override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): ST_IntersectsAgg = copy(leftChip = newLeft, rightChip = newRight) } -object ST_IntersectsAggregate { +object ST_IntersectsAgg { def registryExpressionInfo(db: Option[String]): ExpressionInfo = new ExpressionInfo( classOf[IndexGeometry].getCanonicalName, db.orNull, - "st_intersects_aggregate", + "st_intersects_agg", """ | _FUNC_(left_index, right_index)) - Resolves an intersects based on matched indices. """.stripMargin, diff --git a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala index fac9fd455..596d1e2a7 100644 --- a/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala +++ b/src/main/scala/com/databricks/labs/mosaic/functions/MosaicContext.scala @@ -311,23 +311,23 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends /** Aggregators */ registry.registerFunction( FunctionIdentifier("st_intersection_aggregate", database), - ST_IntersectionAggregate.registryExpressionInfo(database), - (exprs: Seq[Expression]) => ST_IntersectionAggregate(exprs(0), exprs(1), geometryAPI.name, indexSystem, 0, 0) + ST_IntersectionAgg.registryExpressionInfo(database), + (exprs: Seq[Expression]) => ST_IntersectionAgg(exprs(0), exprs(1), geometryAPI.name, indexSystem, 0, 0) ) registry.registerFunction( FunctionIdentifier("st_intersection_agg", database), - ST_IntersectionAggregate.registryExpressionInfo(database), - (exprs: Seq[Expression]) => ST_IntersectionAggregate(exprs(0), exprs(1), geometryAPI.name, indexSystem, 0, 0) + ST_IntersectionAgg.registryExpressionInfo(database), + (exprs: Seq[Expression]) => ST_IntersectionAgg(exprs(0), exprs(1), geometryAPI.name, indexSystem, 0, 0) ) registry.registerFunction( FunctionIdentifier("st_intersects_aggregate", database), - ST_IntersectsAggregate.registryExpressionInfo(database), - (exprs: Seq[Expression]) => ST_IntersectsAggregate(exprs(0), exprs(1), geometryAPI.name) + ST_IntersectsAgg.registryExpressionInfo(database), + (exprs: Seq[Expression]) => ST_IntersectsAgg(exprs(0), exprs(1), geometryAPI.name) ) registry.registerFunction( FunctionIdentifier("st_intersects_agg", database), - ST_IntersectsAggregate.registryExpressionInfo(database), - (exprs: Seq[Expression]) => ST_IntersectsAggregate(exprs(0), exprs(1), geometryAPI.name) + ST_IntersectsAgg.registryExpressionInfo(database), + (exprs: Seq[Expression]) => ST_IntersectsAgg(exprs(0), exprs(1), geometryAPI.name) ) registry.registerFunction( FunctionIdentifier("st_union_agg", database), @@ -502,6 +502,8 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends /** Legacy API Specific aliases */ aliasFunction(registry, "index_geometry", database, "grid_boundaryaswkb", database) + aliasFunction(registry, "st_intersection_aggregate", database, "st_intersection_agg", database) + aliasFunction(registry, "st_intersects_aggregate", database, "st_intersects_agg", database) aliasFunction(registry, "mosaic_explode", database, "grid_tessellateexplode", database) aliasFunction(registry, "mosaicfill", database, "grid_tessellate", database) aliasFunction(registry, "point_index_geom", database, "grid_pointascellid", database) @@ -752,18 +754,15 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends ColumnAdapter(RST_WorldToRasterCoordY(raster.expr, lit(x).expr, lit(y).expr, expressionConfig)) /** Aggregators */ - def st_intersects_aggregate(leftIndex: Column, rightIndex: Column): Column = + def st_intersects_agg(leftIndex: Column, rightIndex: Column): Column = ColumnAdapter( - ST_IntersectsAggregate(leftIndex.expr, rightIndex.expr, geometryAPI.name).toAggregateExpression(isDistinct = false) + ST_IntersectsAgg(leftIndex.expr, rightIndex.expr, geometryAPI.name).toAggregateExpression(isDistinct = false) ) - - def st_intersects_agg(leftIndex: Column, rightIndex: Column): Column = st_intersects_aggregate(leftIndex, rightIndex) - def st_intersection_aggregate(leftIndex: Column, rightIndex: Column): Column = + def st_intersection_agg(leftIndex: Column, rightIndex: Column): Column = ColumnAdapter( - ST_IntersectionAggregate(leftIndex.expr, rightIndex.expr, geometryAPI.name, indexSystem, 0, 0) - .toAggregateExpression(isDistinct = false) + ST_IntersectionAgg(leftIndex.expr, rightIndex.expr, geometryAPI.name, indexSystem, 0, 0) + .toAggregateExpression(isDistinct = false) ) - def st_intersection_agg(leftIndex: Column, rightIndex: Column): Column = st_intersection_aggregate(leftIndex, rightIndex) def st_union_agg(geom: Column): Column = ColumnAdapter(ST_UnionAgg(geom.expr, geometryAPI.name).toAggregateExpression(isDistinct = false)) def rst_merge_agg(raster: Column): Column = @@ -921,6 +920,10 @@ class MosaicContext(indexSystem: IndexSystem, geometryAPI: GeometryAPI) extends def try_sql(inCol: Column): Column = ColumnAdapter(TrySql(inCol.expr)) // Legacy API + @deprecated("Please use 'st_intersects_agg' expression instead.") + def st_intersects_aggregate(leftIndex: Column, rightIndex: Column): Column = st_intersects_agg(leftIndex, rightIndex) + @deprecated("Please use 'st_intersection_agg' expression instead.") + def st_intersection_aggregate(leftIndex: Column, rightIndex: Column): Column = st_intersection_agg(leftIndex, rightIndex) @deprecated("Please use 'grid_boundaryaswkb' or 'grid_boundary(..., format_name)' expressions instead.") def index_geometry(indexID: Column): Column = grid_boundaryaswkb(indexID) @deprecated("Please use 'grid_tessellateexplode' expression instead.") diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionBehaviors.scala index c7ec70f36..7007e93dc 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectionBehaviors.scala @@ -31,7 +31,7 @@ trait ST_IntersectionBehaviors extends QueryTest { .select( col("wkt"), col("id").alias("left_id"), - mosaic_explode(col("wkt"), resolution).alias("left_index"), + grid_tessellateexplode(col("wkt"), resolution).alias("left_index"), col("wkt").alias("left_wkt") ) @@ -43,7 +43,7 @@ trait ST_IntersectionBehaviors extends QueryTest { .select( col("wkt"), col("id").alias("right_id"), - mosaic_explode(col("wkt"), resolution).alias("right_index"), + grid_tessellateexplode(col("wkt"), resolution).alias("right_index"), col("wkt").alias("right_wkt") ) @@ -58,7 +58,7 @@ trait ST_IntersectionBehaviors extends QueryTest { "right_id" ) .agg( - st_intersection_aggregate(col("left_index"), col("right_index")).alias("agg_intersection"), + st_intersection_agg(col("left_index"), col("right_index")).alias("agg_intersection"), first("left_wkt").alias("left_wkt"), first("right_wkt").alias("right_wkt") ) @@ -123,7 +123,7 @@ trait ST_IntersectionBehaviors extends QueryTest { col("left_index.index_id") === col("right_index.index_id") ) .groupBy("left_row_id") - .agg(st_intersection_aggregate(col("left_index"), col("right_index")).alias("geom")) + .agg(st_intersection_agg(col("left_index"), col("right_index")).alias("geom")) .withColumn("area", st_area(col("geom"))) (results.select("area").as[Double].collect().head - @@ -136,7 +136,7 @@ trait ST_IntersectionBehaviors extends QueryTest { """ |SELECT | left_row_id, - | ST_Area(ST_Intersection_Aggregate(left_index, right_index)) AS area + | ST_Area(ST_Intersection_Agg(left_index, right_index)) AS area |FROM left |JOIN right |ON left_index.index_id = right_index.index_id @@ -177,7 +177,7 @@ trait ST_IntersectionBehaviors extends QueryTest { .select( col("wkt"), col("id").alias("left_id"), - mosaic_explode(col("wkt"), resolution).alias("left_index"), + grid_tessellateexplode(col("wkt"), resolution).alias("left_index"), col("wkt").alias("left_wkt") ) @@ -199,7 +199,7 @@ trait ST_IntersectionBehaviors extends QueryTest { "right_id" ) .agg( - st_intersection_aggregate(col("left_index"), col("right_index")).alias("agg_intersection"), + st_intersection_agg(col("left_index"), col("right_index")).alias("agg_intersection"), first("left_wkt").alias("left_wkt"), first("right_wkt").alias("right_wkt") ) @@ -263,7 +263,7 @@ trait ST_IntersectionBehaviors extends QueryTest { case H3IndexSystem => InternalRow.fromSeq(Seq(true, 622236750694711295L, Array.empty[Byte])) } - val stIntersectionAgg = ST_IntersectionAggregate(null, null, geometryAPI.name, indexSystem, 0, 0) + val stIntersectionAgg = ST_IntersectionAgg(null, null, geometryAPI.name, indexSystem, 0, 0) noException should be thrownBy stIntersectionAgg.getCellGeom(stringIDRow, ChipType(StringType)) noException should be thrownBy stIntersectionAgg.getCellGeom(longIDRow, ChipType(LongType)) an[Error] should be thrownBy stIntersectionAgg.getCellGeom( diff --git a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsBehaviors.scala b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsBehaviors.scala index 4a480dfd3..a94409e34 100644 --- a/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsBehaviors.scala +++ b/src/test/scala/com/databricks/labs/mosaic/expressions/geometry/ST_IntersectsBehaviors.scala @@ -27,7 +27,7 @@ trait ST_IntersectsBehaviors extends QueryTest { val left = boroughs .select( col("id").alias("left_id"), - mosaic_explode(col("wkt"), resolution).alias("left_index"), + grid_tessellateexplode(col("wkt"), resolution).alias("left_index"), col("wkt").alias("left_wkt") ) @@ -38,7 +38,7 @@ trait ST_IntersectsBehaviors extends QueryTest { ) .select( col("id").alias("right_id"), - mosaic_explode(col("wkt"), resolution).alias("right_index"), + grid_tessellateexplode(col("wkt"), resolution).alias("right_index"), col("wkt").alias("right_wkt") ) @@ -52,7 +52,7 @@ trait ST_IntersectsBehaviors extends QueryTest { "right_id" ) .agg( - st_intersects_aggregate(col("left_index"), col("right_index")).alias("agg_intersects"), + st_intersects_agg(col("left_index"), col("right_index")).alias("agg_intersects"), first("left_wkt").alias("left_wkt"), first("right_wkt").alias("right_wkt") ) @@ -71,22 +71,13 @@ trait ST_IntersectsBehaviors extends QueryTest { right.createOrReplaceTempView("right") val result2 = spark.sql(""" - |SELECT ST_INTERSECTS_AGGREGATE(LEFT_INDEX, RIGHT_INDEX) - |FROM LEFT - |INNER JOIN RIGHT ON LEFT_INDEX.INDEX_ID == RIGHT_INDEX.INDEX_ID - |GROUP BY LEFT_ID, RIGHT_ID - |""".stripMargin) - - result2.collect().length should be > 0 - - val result3 = spark.sql(""" |SELECT ST_INTERSECTS_AGG(LEFT_INDEX, RIGHT_INDEX) |FROM LEFT |INNER JOIN RIGHT ON LEFT_INDEX.INDEX_ID == RIGHT_INDEX.INDEX_ID |GROUP BY LEFT_ID, RIGHT_ID |""".stripMargin) - result3.collect().length should be > 0 + result2.collect().length should be > 0 noException should be thrownBy st_intersects_agg(lit("POLYGON (1 1, 2 2, 3 3, 1 1)"), lit("POLYGON (1 1, 2 2, 3 3, 1 1)")) } @@ -141,7 +132,7 @@ trait ST_IntersectsBehaviors extends QueryTest { val results = chips .groupBy("row_id") - .agg(st_intersects_aggregate(col("left_index"), col("right_index")).alias("flag")) + .agg(st_intersects_agg(col("left_index"), col("right_index")).alias("flag")) results.select("flag").as[Boolean].collect() should contain theSameElementsAs Seq(true, true, true, true, false) } @@ -158,7 +149,7 @@ trait ST_IntersectsBehaviors extends QueryTest { .select( col("wkt"), col("id").alias("left_id"), - mosaic_explode(col("wkt"), resolution).alias("left_index"), + grid_tessellateexplode(col("wkt"), resolution).alias("left_index"), col("wkt").alias("left_wkt") ) @@ -180,7 +171,7 @@ trait ST_IntersectsBehaviors extends QueryTest { "right_id" ) .agg( - st_intersects_aggregate(col("left_index"), col("right_index")).alias("agg_intersects"), + st_intersects_agg(col("left_index"), col("right_index")).alias("agg_intersects"), first("left_wkt").alias("left_wkt"), first("right_wkt").alias("right_wkt") ) From 00b1f395e2ed4e54d3e2f7c8a25dfd03a8a32527 Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 23 Jan 2024 12:18:48 -0500 Subject: [PATCH 6/9] more adjustments from PR feedback. --- docs/source/api/rasterio-udfs.rst | 41 +++++++++++++-------------- docs/source/api/spatial-functions.rst | 8 +++--- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/docs/source/api/rasterio-udfs.rst b/docs/source/api/rasterio-udfs.rst index c1ea51672..30e2543b7 100644 --- a/docs/source/api/rasterio-udfs.rst +++ b/docs/source/api/rasterio-udfs.rst @@ -119,14 +119,14 @@ Finally we will apply the function to the DataFrame. .. code-block:: python df.select(compute_band_mean("tile.raster")).show() - +---------------------------+ - | compute_band_mean(raster) | - +---------------------------+ - | 0.0111000000000000| - | 0.0021000000000000| - | 0.3001000000000000| - | ... | - +---------------------------+ + +----------------------------+ + | compute_band_mean(raster) | + +----------------------------+ + | 0.0111000000000000 | + | 0.0021000000000000 | + | 0.3001000000000000 | + | ... | + +----------------------------+ UDF example for computing NDVI @@ -297,29 +297,28 @@ Finally we will apply the function to the DataFrame. lit("dbfs:/path/to/output/dir").alias("fuse_dir") ) ).display() - +-------------------------------------+ + +---------------------------------------------+ | write_raster(raster, driver, uuid, fuse_dir)| - +-------------------------------------+ - | dbfs:/path/to/output/dir/1234.tif | - | dbfs:/path/to/output/dir/4545.tif | - | dbfs:/path/to/output/dir/3215.tif | - | ... | - +-------------------------------------+ + +---------------------------------------------+ + | dbfs:/path/to/output/dir/1234.tif | + | dbfs:/path/to/output/dir/4545.tif | + | dbfs:/path/to/output/dir/3215.tif | + | ... | + +---------------------------------------------+ Sometimes you don't need to be quite as fancy. Consider when you simply want to specify to write out raster contents, -in this case as a TIF (or you could specify the extension or include it in the file_id). +assuming you specify the extension in the file_id. This is just writing binary column to file, nothing further. .. code-block:: python @udf("string") - def write_tif(raster, file_id, fuse_dir): + def write_contents(raster, file_name, fuse_dir): from pathlib import Path import os import shutil import tempfile Path(fuse_dir).mkdir(parents=True, exist_ok=True) - file_name = f"{file_id}.tif" fuse_path = f"{fuse_dir}/{file_name}" if not os.path.exists(fuse_path): with tempfile.TemporaryDirectory() as tmp_dir: @@ -338,14 +337,14 @@ Finally we will apply the function to the DataFrame. .. code-block:: python df.select( - write_tif( + write_contents( "tile.raster", - "uuid", + F.concat("uuid", F.lit(".tif").alias("file_name"), lit("dbfs:/path/to/output/dir").alias("fuse_dir") ) ).display() +-------------------------------------+ - | write_tif(raster, uuid, fuse_dir)| + | write_tif(raster, file_name, fuse_dir) | +-------------------------------------+ | dbfs:/path/to/output/dir/1234.tif | | dbfs:/path/to/output/dir/4545.tif | diff --git a/docs/source/api/spatial-functions.rst b/docs/source/api/spatial-functions.rst index b5a8473c8..2e02fb3d6 100644 --- a/docs/source/api/spatial-functions.rst +++ b/docs/source/api/spatial-functions.rst @@ -372,7 +372,7 @@ st_concavehull df = spark.createDataFrame([{'wkt': 'MULTIPOINT ((10 40), (40 30), (20 20), (30 10))'}]) df.select(st_concavehull('wkt'), lit(0.1))).show(1, False) +---------------------------------------------+ - |st_concavehull(wkt, 0.1) | + |st_concavehull(wkt, 0.1) | +---------------------------------------------+ |POLYGON ((10 40, 20 20, 30 10, 40 30, 10 40))| +---------------------------------------------+ @@ -382,7 +382,7 @@ st_concavehull val df = List(("MULTIPOINT ((10 40), (40 30), (20 20), (30 10))")).toDF("wkt") df.select(st_concavehull(col("wkt"), lit(0.1))).show(false) +---------------------------------------------+ - |st_concavehull(wkt, 0.1) | + |st_concavehull(wkt, 0.1) | +---------------------------------------------+ |POLYGON ((10 40, 20 20, 30 10, 40 30, 10 40))| +---------------------------------------------+ @@ -391,7 +391,7 @@ st_concavehull SELECT st_convexhull("MULTIPOINT ((10 40), (40 30), (20 20), (30 10))", 0.1) +---------------------------------------------+ - |st_concavehull(wkt, 0.1) | + |st_concavehull(wkt, 0.1) | +---------------------------------------------+ |POLYGON ((10 40, 20 20, 30 10, 40 30, 10 40))| +---------------------------------------------+ @@ -401,7 +401,7 @@ st_concavehull df <- createDataFrame(data.frame(wkt = "MULTIPOINT ((10 40), (40 30), (20 20), (30 10))")) showDF(select(df, st_concavehull(column("wkt"), lit(0.1)))) +---------------------------------------------+ - |st_concavehull(wkt, 0.1) | + |st_concavehull(wkt, 0.1) | +---------------------------------------------+ |POLYGON ((10 40, 20 20, 30 10, 40 30, 10 40))| +---------------------------------------------+ From 4b45b44d3ad26982b6b55a0070efecc3428c1b5c Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 23 Jan 2024 12:20:31 -0500 Subject: [PATCH 7/9] more adjustments from PR feedback. --- docs/source/api/rasterio-udfs.rst | 32 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/source/api/rasterio-udfs.rst b/docs/source/api/rasterio-udfs.rst index 30e2543b7..80561a89a 100644 --- a/docs/source/api/rasterio-udfs.rst +++ b/docs/source/api/rasterio-udfs.rst @@ -297,14 +297,14 @@ Finally we will apply the function to the DataFrame. lit("dbfs:/path/to/output/dir").alias("fuse_dir") ) ).display() - +---------------------------------------------+ - | write_raster(raster, driver, uuid, fuse_dir)| - +---------------------------------------------+ - | dbfs:/path/to/output/dir/1234.tif | - | dbfs:/path/to/output/dir/4545.tif | - | dbfs:/path/to/output/dir/3215.tif | - | ... | - +---------------------------------------------+ + +----------------------------------------------+ + | write_raster(raster, driver, uuid, fuse_dir) | + +----------------------------------------------+ + | dbfs:/path/to/output/dir/1234.tif | + | dbfs:/path/to/output/dir/4545.tif | + | dbfs:/path/to/output/dir/3215.tif | + | ... | + +----------------------------------------------+ Sometimes you don't need to be quite as fancy. Consider when you simply want to specify to write out raster contents, assuming you specify the extension in the file_id. This is just writing binary column to file, nothing further. @@ -343,11 +343,11 @@ Finally we will apply the function to the DataFrame. lit("dbfs:/path/to/output/dir").alias("fuse_dir") ) ).display() - +-------------------------------------+ - | write_tif(raster, file_name, fuse_dir) | - +-------------------------------------+ - | dbfs:/path/to/output/dir/1234.tif | - | dbfs:/path/to/output/dir/4545.tif | - | dbfs:/path/to/output/dir/3215.tif | - | ... | - +-------------------------------------+ + +----------------------------------------+ + | write_tif(raster, file_name, fuse_dir) | + +----------------------------------------+ + | dbfs:/path/to/output/dir/1234.tif | + | dbfs:/path/to/output/dir/4545.tif | + | dbfs:/path/to/output/dir/3215.tif | + | ... | + +----------------------------------------+ From 87d45a2aff7e862e2bcc25d952731124975fe20e Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 23 Jan 2024 12:29:18 -0500 Subject: [PATCH 8/9] complete move of _agg raster functions to aggregators.py --- python/mosaic/api/raster.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/mosaic/api/raster.py b/python/mosaic/api/raster.py index 6cf24d811..befd10cb9 100644 --- a/python/mosaic/api/raster.py +++ b/python/mosaic/api/raster.py @@ -15,9 +15,7 @@ "rst_boundingbox", "rst_clip", "rst_combineavg", - "rst_combineavg_agg", "rst_derivedband", - "rst_derivedband_agg", "rst_frombands", "rst_fromcontent", "rst_fromfile", @@ -30,7 +28,6 @@ "rst_mapalgebra", "rst_memsize", "rst_merge", - "rst_merge_agg", "rst_metadata", "rst_ndvi", "rst_numbands", From de74df2b4db7532c413207337d7a3dedea9d3aeb Mon Sep 17 00:00:00 2001 From: Michael Johns Date: Tue, 23 Jan 2024 14:55:15 -0500 Subject: [PATCH 9/9] Fixed a broken python test from refactor. Live tested rasterio udf examples (last 2) and updated code and comments. Added additional json_spec examples to rst_mapalgebra. --- docs/source/api/raster-functions.rst | 6 ++ docs/source/api/rasterio-udfs.rst | 83 +++++++++++++++------------- python/test/test_vector_functions.py | 4 +- 3 files changed, 52 insertions(+), 41 deletions(-) diff --git a/docs/source/api/raster-functions.rst b/docs/source/api/raster-functions.rst index c982e929a..87af5c46e 100644 --- a/docs/source/api/raster-functions.rst +++ b/docs/source/api/raster-functions.rst @@ -908,6 +908,12 @@ rst_mapalgebra arrays (such as +, -, *, and /) along with logical operators (such as >, <, =). For this distributed implementation, all rasters must have the same dimensions and no projection checking is performed. + Here are examples of the json_spec': (1) shows default indexing, (2) shows reusing an index, + and (3) shows band indexing. + (1) '{"calc": "A+B/C"}' + (2) '{"calc": "A+B/C", "A_index": 0, "B_index": 1, "C_index": 1}' + (3) '{"calc": "A+B/C", "A_index": 0, "B_index": 1, "C_index": 2, "A_band": 1, "B_band": 1, "C_band": 1}' + :param tile: A column containing the raster tile. :type tile: Column (RasterTileType) :param json_spec: A column containing the map algebra operation specification. diff --git a/docs/source/api/rasterio-udfs.rst b/docs/source/api/rasterio-udfs.rst index 80561a89a..339223c58 100644 --- a/docs/source/api/rasterio-udfs.rst +++ b/docs/source/api/rasterio-udfs.rst @@ -236,7 +236,11 @@ Firstly we will create a spark DataFrame from a directory of raster files. Next we will define a function that will write a given raster file to disk. A "gotcha" to keep in mind is that you do not want to have a file context manager open when you go to write out its context as the context manager will not yet -have been flushed. +have been flushed. Another "gotcha" might be that the raster dataset does not have CRS included; if this arises, we +recommend adjusting the function to specify the CRS and set it on the dst variable, more at +`rasterio.crs `_. We would also point out that notional +"file_id" param can be constructed as a repeatable name from other field(s) in your dataframe / table or be random, +depending on your needs. .. code-block:: python @@ -253,31 +257,30 @@ have been flushed. # - [1] populate the initial profile # # profile is needed in order to georeference the image - profile = None - with MemoryFile(BytesIO(raster)) as memfile: - with memfile.open() as dataset: - profile = dataset.profile - - # - [2] get the correct extension - extensions_map = rasterio.drivers.raster_driver_extensions() - driver_map = {v: k for k, v in extensions_map.items()} - extension = driver_map[driver] #e.g. GTiff - file_name = f"{file_id}.{extension}" - - # - [3] write local raster - # - this is showing a single band [1] - # being written with tempfile.TemporaryDirectory() as tmp_dir: - tmp_path = f"{tmp_dir}/{file_name}" + profile = None + data_arr = None + with MemoryFile(BytesIO(raster)) as memfile: + with memfile.open() as dataset: + profile = dataset.profile + data_arr = dataset.read() # here you can update profile using .update method # example https://rasterio.readthedocs.io/en/latest/topics/writing.html + # - [2] get the correct extension + extensions_map = rasterio.drivers.raster_driver_extensions() + driver_map = {v: k for k, v in extensions_map.items()} + extension = driver_map[driver] #e.g. GTiff + file_name = f"{file_id}.{extension}" + # - [3] write local raster + # - this is showing a single band [1] + # being written + tmp_path = f"{tmp_dir}/{file_name}" with rasterio.open( - tmp_path, - "w", - **profile + tmp_path, + "w", + **profile ) as dst: - dst.write(raster,1) # <- adjust as needed - + dst.write(data_arr) # <- adjust as needed # - [4] copy to fuse path Path(fuse_dir).mkdir(parents=True, exist_ok=True) fuse_path = f"{fuse_dir}/{file_name}" @@ -294,25 +297,27 @@ Finally we will apply the function to the DataFrame. "tile.raster", lit("GTiff").alias("driver"), "uuid", - lit("dbfs:/path/to/output/dir").alias("fuse_dir") + lit("/dbfs/path/to/output/dir").alias("fuse_dir") ) ).display() +----------------------------------------------+ | write_raster(raster, driver, uuid, fuse_dir) | +----------------------------------------------+ - | dbfs:/path/to/output/dir/1234.tif | - | dbfs:/path/to/output/dir/4545.tif | - | dbfs:/path/to/output/dir/3215.tif | + | /dbfs/path/to/output/dir/1234.tif | + | /dbfs/path/to/output/dir/4545.tif | + | /dbfs/path/to/output/dir/3215.tif | | ... | +----------------------------------------------+ Sometimes you don't need to be quite as fancy. Consider when you simply want to specify to write out raster contents, -assuming you specify the extension in the file_id. This is just writing binary column to file, nothing further. +assuming you specify the extension in the file_name. This is just writing binary column to file, nothing further. Again, +we use a notional "uuid" column as part of "file_name" param, which would have the same considerations as mentioned +above. .. code-block:: python @udf("string") - def write_contents(raster, file_name, fuse_dir): + def write_binary(raster_bin, file_name, fuse_dir): from pathlib import Path import os import shutil @@ -326,7 +331,7 @@ assuming you specify the extension in the file_id. This is just writing binary c # - write within the tmp_dir context # - flush the writer before copy tmp_file = open(tmp_path, "wb") - tmp_file.write(raster) # <- write entire binary content + tmp_file.write(raster_bin) # <- write entire binary content tmp_file.close() # - copy local to fuse shutil.copyfile(tmp_path, fuse_path) @@ -337,17 +342,17 @@ Finally we will apply the function to the DataFrame. .. code-block:: python df.select( - write_contents( + write_binary( "tile.raster", - F.concat("uuid", F.lit(".tif").alias("file_name"), - lit("dbfs:/path/to/output/dir").alias("fuse_dir") + F.concat("uuid", F.lit(".tif")).alias("file_name"), + F.lit("/dbfs/path/to/output/dir").alias("fuse_dir") ) ).display() - +----------------------------------------+ - | write_tif(raster, file_name, fuse_dir) | - +----------------------------------------+ - | dbfs:/path/to/output/dir/1234.tif | - | dbfs:/path/to/output/dir/4545.tif | - | dbfs:/path/to/output/dir/3215.tif | - | ... | - +----------------------------------------+ + +-------------------------------------------+ + | write_binary(raster, file_name, fuse_dir) | + +-------------------------------------------+ + | /dbfs/path/to/output/dir/1234.tif | + | /dbfs/path/to/output/dir/4545.tif | + | /dbfs/path/to/output/dir/3215.tif | + | ... | + +-------------------------------------------+ diff --git a/python/test/test_vector_functions.py b/python/test/test_vector_functions.py index 4db8acd1f..3a127327a 100644 --- a/python/test/test_vector_functions.py +++ b/python/test/test_vector_functions.py @@ -169,10 +169,10 @@ def test_aggregation_functions(self): .join(right_df, col("left_index.index_id") == col("right_index.index_id")) .groupBy("left_id", "right_id") .agg( - api.st_intersects_aggregate( + api.st_intersects_agg( col("left_index"), col("right_index") ).alias("agg_intersects"), - api.st_intersection_aggregate( + api.st_intersection_agg( col("left_index"), col("right_index") ).alias("agg_intersection"), first("left_geom").alias("left_geom"),