From d49d30687741ee5a262c4aa695a8e6302a6f7f2e Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 28 Nov 2022 16:48:48 +0100 Subject: [PATCH 1/3] Make predict processes for ML more general #368 --- CHANGELOG.md | 3 +- proposals/load_ml_model.json | 2 +- proposals/predict_curve.json | 4 +- ...ndom_forest.json => predict_ml_model.json} | 8 ++-- proposals/predict_ml_model_probabilities.json | 45 +++++++++++++++++++ 5 files changed, 54 insertions(+), 8 deletions(-) rename proposals/{predict_random_forest.json => predict_ml_model.json} (69%) create mode 100644 proposals/predict_ml_model_probabilities.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a69e93e..8066f38e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `fit_regr_random_forest` - `flatten_dimensions` - `load_ml_model` - - `predict_random_forest` + - `predict_ml_model` + - `predict_ml_model_probabilities` - `save_ml_model` - `unflatten_dimension` - `vector_buffer` diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json index 151513c8..076caa3d 100644 --- a/proposals/load_ml_model.json +++ b/proposals/load_ml_model.json @@ -36,7 +36,7 @@ } ], "returns": { - "description": "A machine learning model to be used with machine learning processes such as ``predict_random_forest()``.", + "description": "A machine learning model to be used with machine learning processes such as ``predict_ml_model()`` or ``predict_ml_model_probabilities()``.", "schema": { "type": "object", "subtype": "ml-model" diff --git a/proposals/predict_curve.json b/proposals/predict_curve.json index 52adcc5e..3588c415 100644 --- a/proposals/predict_curve.json +++ b/proposals/predict_curve.json @@ -1,6 +1,6 @@ { "id": "predict_curve", - "summary": "Predict values", + "summary": "Predict values using a model function", "description": "Predict values using a model function and pre-computed parameters. The process is primarily intended to compute values for new labels, but it can also fill gaps where existing labels contain no-data (`null`) values.", "categories": [ "cubes", @@ -109,4 +109,4 @@ "message": "A dimension with the specified name does not exist." } } -} \ No newline at end of file +} diff --git a/proposals/predict_random_forest.json b/proposals/predict_ml_model.json similarity index 69% rename from proposals/predict_random_forest.json rename to proposals/predict_ml_model.json index 62c54e9f..fe61bf45 100644 --- a/proposals/predict_random_forest.json +++ b/proposals/predict_ml_model.json @@ -1,7 +1,7 @@ { - "id": "predict_random_forest", - "summary": "Predict values based on a Random Forest model", - "description": "Applies a Random Forest machine learning model to an array and predict a value for it.", + "id": "predict_ml_model", + "summary": "Predict values values using a ML model", + "description": "Applies a machine learning model to an array and predicts a value/class for it.", "categories": [ "machine learning", "reducer" @@ -23,7 +23,7 @@ }, { "name": "model", - "description": "A model object that can be trained with the processes ``fit_regr_random_forest()`` (regression) and ``fit_class_random_forest()`` (classification).", + "description": "A ML model that can be trained with one of the ML processes such as ``fit_class_random_forest()``.", "schema": { "type": "object", "subtype": "ml-model" diff --git a/proposals/predict_ml_model_probabilities.json b/proposals/predict_ml_model_probabilities.json new file mode 100644 index 00000000..afdf256d --- /dev/null +++ b/proposals/predict_ml_model_probabilities.json @@ -0,0 +1,45 @@ +{ + "id": "predict_ml_model_probabilities", + "summary": "Predict class probabilities using a ML model", + "description": "Applies a machine learning model to an array and predicts (class) probabilities for them.", + "categories": [ + "machine learning", + "reducer" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "An array of numbers.", + "schema": { + "type": "array", + "items": { + "type": [ + "number", + "null" + ] + } + } + }, + { + "name": "model", + "description": "A ML model that can be trained with one of the ML processes such as ``fit_regr_random_forest()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + } + ], + "returns": { + "description": "The predicted (class) probabilities. Returns `null` if any of the given values in the array is a no-data value.", + "schema": { + "type": "array", + "items": { + "type": [ + "number", + "null" + ] + } + } + } +} From 5345c195fce1675db66cca6e44f858ea88e9e0a7 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 14 Mar 2023 12:14:01 +0100 Subject: [PATCH 2/3] Remove ML processes for 2.0.0 #416 --- meta/subtype-schemas.json | 6 -- proposals/fit_class_random_forest.json | 110 ------------------------- proposals/fit_regr_random_forest.json | 110 ------------------------- proposals/load_ml_model.json | 53 ------------ proposals/predict_random_forest.json | 42 ---------- proposals/save_ml_model.json | 44 ---------- 6 files changed, 365 deletions(-) delete mode 100644 proposals/fit_class_random_forest.json delete mode 100644 proposals/fit_regr_random_forest.json delete mode 100644 proposals/load_ml_model.json delete mode 100644 proposals/predict_random_forest.json delete mode 100644 proposals/save_ml_model.json diff --git a/meta/subtype-schemas.json b/meta/subtype-schemas.json index 941e6a48..498adf60 100644 --- a/meta/subtype-schemas.json +++ b/meta/subtype-schemas.json @@ -238,12 +238,6 @@ } } }, - "ml-model": { - "type": "object", - "subtype": "ml-model", - "title": "Machine Learning Model", - "description": "A machine learning model, accompanied with STAC metadata that implements the the STAC ml-model extension." - }, "output-format": { "type": "string", "subtype": "output-format", diff --git a/proposals/fit_class_random_forest.json b/proposals/fit_class_random_forest.json deleted file mode 100644 index 6eb874bf..00000000 --- a/proposals/fit_class_random_forest.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "id": "fit_class_random_forest", - "summary": "Train a random forest classification model", - "description": "Executes the fit of a random forest classification based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest classification model is based on the approach by Breiman (2001).", - "categories": [ - "machine learning" - ], - "experimental": true, - "parameters": [ - { - "name": "predictors", - "description": "The predictors for the classification model as a vector data cube. Aggregated to the features (vectors) of the target input variable.", - "schema": [ - { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - }, - { - "type": "bands" - } - ] - }, - { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - }, - { - "type": "other" - } - ] - } - ] - }, - { - "name": "target", - "description": "The training sites for the classification model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", - "schema": { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - } - ] - } - }, - { - "name": "max_variables", - "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split.\n- `sqrt`: The square root of the number of variables are considered for each split. This is often the default for classification.", - "schema": [ - { - "type": "integer", - "minimum": 1 - }, - { - "type": "string", - "enum": [ - "all", - "log2", - "onethird", - "sqrt" - ] - } - ] - }, - { - "name": "num_trees", - "description": "The number of trees build within the Random Forest classification.", - "optional": true, - "default": 100, - "schema": { - "type": "integer", - "minimum": 1 - } - }, - { - "name": "seed", - "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", - "optional": true, - "default": null, - "schema": { - "type": [ - "integer", - "null" - ] - } - } - ], - "returns": { - "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", - "schema": { - "type": "object", - "subtype": "ml-model" - } - }, - "links": [ - { - "href": "https://doi.org/10.1023/A:1010933404324", - "title": "Breiman (2001): Random Forests", - "type": "text/html", - "rel": "about" - } - ] -} diff --git a/proposals/fit_regr_random_forest.json b/proposals/fit_regr_random_forest.json deleted file mode 100644 index 51191fa5..00000000 --- a/proposals/fit_regr_random_forest.json +++ /dev/null @@ -1,110 +0,0 @@ -{ - "id": "fit_regr_random_forest", - "summary": "Train a random forest regression model", - "description": "Executes the fit of a random forest regression based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest regression model is based on the approach by Breiman (2001).", - "categories": [ - "machine learning" - ], - "experimental": true, - "parameters": [ - { - "name": "predictors", - "description": "The predictors for the regression model as a vector data cube. Aggregated to the features (vectors) of the target input variable.", - "schema": [ - { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - }, - { - "type": "bands" - } - ] - }, - { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - }, - { - "type": "other" - } - ] - } - ] - }, - { - "name": "target", - "description": "The training sites for the regression model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", - "schema": { - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - } - ] - } - }, - { - "name": "max_variables", - "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split. This is often the default for regression.\n- `sqrt`: The square root of the number of variables are considered for each split.", - "schema": [ - { - "type": "integer", - "minimum": 1 - }, - { - "type": "string", - "enum": [ - "all", - "log2", - "onethird", - "sqrt" - ] - } - ] - }, - { - "name": "num_trees", - "description": "The number of trees build within the Random Forest regression.", - "optional": true, - "default": 100, - "schema": { - "type": "integer", - "minimum": 1 - } - }, - { - "name": "seed", - "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", - "optional": true, - "default": null, - "schema": { - "type": [ - "integer", - "null" - ] - } - } - ], - "returns": { - "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", - "schema": { - "type": "object", - "subtype": "ml-model" - } - }, - "links": [ - { - "href": "https://doi.org/10.1023/A:1010933404324", - "title": "Breiman (2001): Random Forests", - "type": "text/html", - "rel": "about" - } - ] -} diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json deleted file mode 100644 index 151513c8..00000000 --- a/proposals/load_ml_model.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "id": "load_ml_model", - "summary": "Load a ML model", - "description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as ``fit_regr_random_forest()`` and ``save_ml_model()``.", - "categories": [ - "machine learning", - "import" - ], - "experimental": true, - "parameters": [ - { - "name": "id", - "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `ml-model` extension.", - "schema": [ - { - "title": "URL", - "type": "string", - "format": "uri", - "subtype": "uri", - "pattern": "^https?://" - }, - { - "title": "Batch Job ID", - "description": "Loading a model by batch job ID is possible only if a single model has been saved by the job. Otherwise, you have to load a specific model from a batch job by URL.", - "type": "string", - "subtype": "job-id", - "pattern": "^[\\w\\-\\.~]+$" - }, - { - "title": "User-uploaded File", - "type": "string", - "subtype": "file-path", - "pattern": "^[^\r\n\\:'\"]+$" - } - ] - } - ], - "returns": { - "description": "A machine learning model to be used with machine learning processes such as ``predict_random_forest()``.", - "schema": { - "type": "object", - "subtype": "ml-model" - } - }, - "links": [ - { - "href": "https://github.com/stac-extensions/ml-model", - "title": "STAC ml-model extension", - "type": "text/html", - "rel": "about" - } - ] -} diff --git a/proposals/predict_random_forest.json b/proposals/predict_random_forest.json deleted file mode 100644 index 62c54e9f..00000000 --- a/proposals/predict_random_forest.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "id": "predict_random_forest", - "summary": "Predict values based on a Random Forest model", - "description": "Applies a Random Forest machine learning model to an array and predict a value for it.", - "categories": [ - "machine learning", - "reducer" - ], - "experimental": true, - "parameters": [ - { - "name": "data", - "description": "An array of numbers.", - "schema": { - "type": "array", - "items": { - "type": [ - "number", - "null" - ] - } - } - }, - { - "name": "model", - "description": "A model object that can be trained with the processes ``fit_regr_random_forest()`` (regression) and ``fit_class_random_forest()`` (classification).", - "schema": { - "type": "object", - "subtype": "ml-model" - } - } - ], - "returns": { - "description": "The predicted value. Returns `null` if any of the given values in the array is a no-data value.", - "schema": { - "type": [ - "number", - "null" - ] - } - } -} diff --git a/proposals/save_ml_model.json b/proposals/save_ml_model.json deleted file mode 100644 index 5e9ea8b0..00000000 --- a/proposals/save_ml_model.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "id": "save_ml_model", - "summary": "Save a ML model", - "description": "Saves a machine learning model as part of a batch job.\n\nThe model will be accompanied by a separate STAC Item that implements the [ml-model extension](https://github.com/stac-extensions/ml-model).", - "categories": [ - "machine learning", - "import" - ], - "experimental": true, - "parameters": [ - { - "name": "data", - "description": "The data to store as a machine learning model.", - "schema": { - "type": "object", - "subtype": "ml-model" - } - }, - { - "name": "options", - "description": "Additional parameters to create the file(s).", - "schema": { - "type": "object", - "additionalParameters": false - }, - "default": {}, - "optional": true - } - ], - "returns": { - "description": "Returns `false` if the process failed to store the model, `true` otherwise.", - "schema": { - "type": "boolean" - } - }, - "links": [ - { - "href": "https://github.com/stac-extensions/ml-model", - "title": "STAC ml-model extension", - "type": "text/html", - "rel": "about" - } - ] -} \ No newline at end of file From 7e2d30eecdaf34f14dbe1476374574145d1b1c91 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Tue, 14 Mar 2023 12:14:59 +0100 Subject: [PATCH 3/3] Add ML processes for 2.1.0 #416 --- meta/subtype-schemas.json | 6 ++ proposals/fit_class_random_forest.json | 110 +++++++++++++++++++++++++ proposals/fit_regr_random_forest.json | 110 +++++++++++++++++++++++++ proposals/load_ml_model.json | 53 ++++++++++++ proposals/predict_random_forest.json | 42 ++++++++++ proposals/save_ml_model.json | 44 ++++++++++ 6 files changed, 365 insertions(+) create mode 100644 proposals/fit_class_random_forest.json create mode 100644 proposals/fit_regr_random_forest.json create mode 100644 proposals/load_ml_model.json create mode 100644 proposals/predict_random_forest.json create mode 100644 proposals/save_ml_model.json diff --git a/meta/subtype-schemas.json b/meta/subtype-schemas.json index 498adf60..941e6a48 100644 --- a/meta/subtype-schemas.json +++ b/meta/subtype-schemas.json @@ -238,6 +238,12 @@ } } }, + "ml-model": { + "type": "object", + "subtype": "ml-model", + "title": "Machine Learning Model", + "description": "A machine learning model, accompanied with STAC metadata that implements the the STAC ml-model extension." + }, "output-format": { "type": "string", "subtype": "output-format", diff --git a/proposals/fit_class_random_forest.json b/proposals/fit_class_random_forest.json new file mode 100644 index 00000000..6eb874bf --- /dev/null +++ b/proposals/fit_class_random_forest.json @@ -0,0 +1,110 @@ +{ + "id": "fit_class_random_forest", + "summary": "Train a random forest classification model", + "description": "Executes the fit of a random forest classification based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest classification model is based on the approach by Breiman (2001).", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "predictors", + "description": "The predictors for the classification model as a vector data cube. Aggregated to the features (vectors) of the target input variable.", + "schema": [ + { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + }, + { + "type": "bands" + } + ] + }, + { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + }, + { + "type": "other" + } + ] + } + ] + }, + { + "name": "target", + "description": "The training sites for the classification model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", + "schema": { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + } + ] + } + }, + { + "name": "max_variables", + "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split.\n- `sqrt`: The square root of the number of variables are considered for each split. This is often the default for classification.", + "schema": [ + { + "type": "integer", + "minimum": 1 + }, + { + "type": "string", + "enum": [ + "all", + "log2", + "onethird", + "sqrt" + ] + } + ] + }, + { + "name": "num_trees", + "description": "The number of trees build within the Random Forest classification.", + "optional": true, + "default": 100, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "seed", + "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + } + ], + "returns": { + "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://doi.org/10.1023/A:1010933404324", + "title": "Breiman (2001): Random Forests", + "type": "text/html", + "rel": "about" + } + ] +} diff --git a/proposals/fit_regr_random_forest.json b/proposals/fit_regr_random_forest.json new file mode 100644 index 00000000..51191fa5 --- /dev/null +++ b/proposals/fit_regr_random_forest.json @@ -0,0 +1,110 @@ +{ + "id": "fit_regr_random_forest", + "summary": "Train a random forest regression model", + "description": "Executes the fit of a random forest regression based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest regression model is based on the approach by Breiman (2001).", + "categories": [ + "machine learning" + ], + "experimental": true, + "parameters": [ + { + "name": "predictors", + "description": "The predictors for the regression model as a vector data cube. Aggregated to the features (vectors) of the target input variable.", + "schema": [ + { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + }, + { + "type": "bands" + } + ] + }, + { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + }, + { + "type": "other" + } + ] + } + ] + }, + { + "name": "target", + "description": "The training sites for the regression model as a vector data cube. This is associated with the target variable for the Random Forest model. The geometry has to associated with a value to predict (e.g. fractional forest canopy cover).", + "schema": { + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + } + ] + } + }, + { + "name": "max_variables", + "description": "Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split. This is often the default for regression.\n- `sqrt`: The square root of the number of variables are considered for each split.", + "schema": [ + { + "type": "integer", + "minimum": 1 + }, + { + "type": "string", + "enum": [ + "all", + "log2", + "onethird", + "sqrt" + ] + } + ] + }, + { + "name": "num_trees", + "description": "The number of trees build within the Random Forest regression.", + "optional": true, + "default": 100, + "schema": { + "type": "integer", + "minimum": 1 + } + }, + { + "name": "seed", + "description": "A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.", + "optional": true, + "default": null, + "schema": { + "type": [ + "integer", + "null" + ] + } + } + ], + "returns": { + "description": "A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://doi.org/10.1023/A:1010933404324", + "title": "Breiman (2001): Random Forests", + "type": "text/html", + "rel": "about" + } + ] +} diff --git a/proposals/load_ml_model.json b/proposals/load_ml_model.json new file mode 100644 index 00000000..151513c8 --- /dev/null +++ b/proposals/load_ml_model.json @@ -0,0 +1,53 @@ +{ + "id": "load_ml_model", + "summary": "Load a ML model", + "description": "Loads a machine learning model from a STAC Item.\n\nSuch a model could be trained and saved as part of a previous batch job with processes such as ``fit_regr_random_forest()`` and ``save_ml_model()``.", + "categories": [ + "machine learning", + "import" + ], + "experimental": true, + "parameters": [ + { + "name": "id", + "description": "The STAC Item to load the machine learning model from. The STAC Item must implement the `ml-model` extension.", + "schema": [ + { + "title": "URL", + "type": "string", + "format": "uri", + "subtype": "uri", + "pattern": "^https?://" + }, + { + "title": "Batch Job ID", + "description": "Loading a model by batch job ID is possible only if a single model has been saved by the job. Otherwise, you have to load a specific model from a batch job by URL.", + "type": "string", + "subtype": "job-id", + "pattern": "^[\\w\\-\\.~]+$" + }, + { + "title": "User-uploaded File", + "type": "string", + "subtype": "file-path", + "pattern": "^[^\r\n\\:'\"]+$" + } + ] + } + ], + "returns": { + "description": "A machine learning model to be used with machine learning processes such as ``predict_random_forest()``.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + "links": [ + { + "href": "https://github.com/stac-extensions/ml-model", + "title": "STAC ml-model extension", + "type": "text/html", + "rel": "about" + } + ] +} diff --git a/proposals/predict_random_forest.json b/proposals/predict_random_forest.json new file mode 100644 index 00000000..62c54e9f --- /dev/null +++ b/proposals/predict_random_forest.json @@ -0,0 +1,42 @@ +{ + "id": "predict_random_forest", + "summary": "Predict values based on a Random Forest model", + "description": "Applies a Random Forest machine learning model to an array and predict a value for it.", + "categories": [ + "machine learning", + "reducer" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "An array of numbers.", + "schema": { + "type": "array", + "items": { + "type": [ + "number", + "null" + ] + } + } + }, + { + "name": "model", + "description": "A model object that can be trained with the processes ``fit_regr_random_forest()`` (regression) and ``fit_class_random_forest()`` (classification).", + "schema": { + "type": "object", + "subtype": "ml-model" + } + } + ], + "returns": { + "description": "The predicted value. Returns `null` if any of the given values in the array is a no-data value.", + "schema": { + "type": [ + "number", + "null" + ] + } + } +} diff --git a/proposals/save_ml_model.json b/proposals/save_ml_model.json new file mode 100644 index 00000000..5e9ea8b0 --- /dev/null +++ b/proposals/save_ml_model.json @@ -0,0 +1,44 @@ +{ + "id": "save_ml_model", + "summary": "Save a ML model", + "description": "Saves a machine learning model as part of a batch job.\n\nThe model will be accompanied by a separate STAC Item that implements the [ml-model extension](https://github.com/stac-extensions/ml-model).", + "categories": [ + "machine learning", + "import" + ], + "experimental": true, + "parameters": [ + { + "name": "data", + "description": "The data to store as a machine learning model.", + "schema": { + "type": "object", + "subtype": "ml-model" + } + }, + { + "name": "options", + "description": "Additional parameters to create the file(s).", + "schema": { + "type": "object", + "additionalParameters": false + }, + "default": {}, + "optional": true + } + ], + "returns": { + "description": "Returns `false` if the process failed to store the model, `true` otherwise.", + "schema": { + "type": "boolean" + } + }, + "links": [ + { + "href": "https://github.com/stac-extensions/ml-model", + "title": "STAC ml-model extension", + "type": "text/html", + "rel": "about" + } + ] +} \ No newline at end of file