From a8c9137739952fc5b05af58dcd75c4cdde9e12ca Mon Sep 17 00:00:00 2001 From: jtsextonMITRE <45762017+jtsextonMITRE@users.noreply.github.com> Date: Wed, 11 Sep 2024 16:10:49 -0400 Subject: [PATCH] examples: updated training YML to be more condensed and understandable --- examples/mnist-classifier-demo/demo.ipynb | 43 ++- examples/mnist-classifier-demo/src/fgm.yml | 76 +++- examples/mnist-classifier-demo/src/train.yml | 325 ++++++------------ .../fgm_mnist_demo/artifacts_restapi.py | 151 ++++++++ .../fgm_mnist_demo/attacks_fgm.py | 294 ++++++++++++++++ .../backend_configs_tensorflow.py | 52 +++ .../fgm_mnist_demo/data_tensorflow.py | 130 +++++++ .../estimators_keras_classifiers.py | 231 +++++++++++++ .../fgm_mnist_demo/estimators_methods.py | 122 +++++++ .../fgm_mnist_demo/import_keras.py | 65 ++++ .../dioptra_custom/fgm_mnist_demo/mlflow.py | 103 ++++++ .../dioptra_custom/fgm_mnist_demo/plugins.py | 137 ++++++++ .../fgm_mnist_demo/random_rng.py | 56 +++ .../fgm_mnist_demo/random_sample.py | 89 +++++ .../fgm_mnist_demo/registry_art.py | 107 ++++++ .../fgm_mnist_demo/registry_mlflow.py | 120 +++++++ .../fgm_mnist_demo/tensorflow.py | 112 ++++++ .../fgm_mnist_demo/tracking_mlflow.py | 99 ++++++ 18 files changed, 2060 insertions(+), 252 deletions(-) create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/artifacts_restapi.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/attacks_fgm.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/backend_configs_tensorflow.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/data_tensorflow.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/estimators_keras_classifiers.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/estimators_methods.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/import_keras.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/mlflow.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/random_rng.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/random_sample.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/registry_art.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/registry_mlflow.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/tensorflow.py create mode 100644 examples/task-plugins/dioptra_custom/fgm_mnist_demo/tracking_mlflow.py diff --git a/examples/mnist-classifier-demo/demo.ipynb b/examples/mnist-classifier-demo/demo.ipynb index 41e62a90a..1ce9e2c15 100644 --- a/examples/mnist-classifier-demo/demo.ipynb +++ b/examples/mnist-classifier-demo/demo.ipynb @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": { "tags": [] }, @@ -41,7 +41,7 @@ "EXPERIMENT_DESC = \"applying the fast gradient sign (FGM) attack to a classifier trained on MNIST\"\n", "QUEUE_NAME = 'tensorflow_cpu'\n", "QUEUE_DESC = 'Tensorflow CPU Queue'\n", - "PLUGIN_FILES = '../task-plugins/dioptra_custom/vc/'\n", + "PLUGIN_FILES = '../task-plugins/dioptra_custom/fgm_mnist_demo/'\n", "MODEL_NAME = \"mnist_classifier\"\n", "\n", "# Default address for accessing the RESTful API service\n", @@ -53,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -176,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": { "tags": [] }, @@ -194,9 +194,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2m2024-09-11 15:28:38\u001b[0m [\u001b[31m\u001b[1merror \u001b[0m] \u001b[1mError code 400 returned. \u001b[0m \u001b[36mdata\u001b[0m=\u001b[35m{'username': 'pluginuser', 'email': 'pluginuser@dioptra.nccoe.nist.gov', 'password': 'pleasemakesuretoPLUGINthecomputer', 'confirmPassword': 'pleasemakesuretoPLUGINthecomputer'}\u001b[0m \u001b[36mmethod\u001b[0m=\u001b[35mPOST\u001b[0m \u001b[36mresponse\u001b[0m=\u001b[35m{\"message\": \"Bad Request - The username on the registration form is not available. Please select another and resubmit.\"}\n", + "\u001b[0m \u001b[36murl\u001b[0m=\u001b[35mhttp://localhost:20080/api/v1/users/\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'username': 'pluginuser', 'status': 'Login successful'}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "try:\n", " client.users.create('pluginuser','pluginuser@dioptra.nccoe.nist.gov','pleasemakesuretoPLUGINthecomputer','pleasemakesuretoPLUGINthecomputer')\n", @@ -214,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -240,13 +259,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "experiment_id, train_ep, queue_id = upload_experiment(client, 'src/train.yml','train','training a classifier on MNIST', PLUGIN_FILES, QUEUE_NAME, QUEUE_DESC, EXPERIMENT_NAME, EXPERIMENT_DESC)\n", - "experiment_id, fgm_ep, queue_id = upload_experiment(client, 'src/fgm.yml','fgm','generating examples on mnist_classifier using the fgm attack', PLUGIN_FILES, QUEUE_NAME, QUEUE_DESC, EXPERIMENT_NAME, EXPERIMENT_DESC)\n", - "experiment_id, infer_ep, queue_id = upload_experiment(client, 'src/infer.yml','infer','evaluating performance of mnist_classifier on generated fgm examples', PLUGIN_FILES, QUEUE_NAME, QUEUE_DESC, EXPERIMENT_NAME, EXPERIMENT_DESC)" + "#experiment_id, fgm_ep, queue_id = upload_experiment(client, 'src/fgm.yml','fgm','generating examples on mnist_classifier using the fgm attack', PLUGIN_FILES, QUEUE_NAME, QUEUE_DESC, EXPERIMENT_NAME, EXPERIMENT_DESC)\n", + "#experiment_id, infer_ep, queue_id = upload_experiment(client, 'src/infer.yml','infer','evaluating performance of mnist_classifier on generated fgm examples', PLUGIN_FILES, QUEUE_NAME, QUEUE_DESC, EXPERIMENT_NAME, EXPERIMENT_DESC)" ] }, { @@ -260,7 +279,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": { "scrolled": true }, @@ -275,7 +294,7 @@ " train_ep, \n", " {\"epochs_p\":\"1\"}, \n", " job_time_limit\n", - ")\n" + ")" ] }, { diff --git a/examples/mnist-classifier-demo/src/fgm.yml b/examples/mnist-classifier-demo/src/fgm.yml index 8d86b3ded..afe4a7b73 100644 --- a/examples/mnist-classifier-demo/src/fgm.yml +++ b/examples/mnist-classifier-demo/src/fgm.yml @@ -53,33 +53,75 @@ parameters: seed_p: -1 tasks: - init_rng: - plugin: dioptra_custom.vc.random_rng.init_rng + load_dataset: + plugin: dioptra_custom.fgm_mnist_demo.plugins.load_dataset inputs: - - name: seed + - name: ep_seed + type: integer + required: false + - name: data_dir + type: string + required: false + - name: subset + type: str_null + required: false + - name: image_size + type: image_size + required: false + - name: rescale + type: number + required: false + - name: validation_split + type: num_null + required: false + - name: batch_size type: integer required: false + - name: label_mode + type: string + required: false + - name: shuffle + type: boolean + required: false outputs: - - ret1: integer - - ret2: rng + dataset: directory_iterator - draw_random_integer: - plugin: dioptra_custom.vc.random_sample.draw_random_integer + get_model: + plugin: dioptra_custom.fgm_mnist_demo.plugins.get_model inputs: - - rng: rng - - name: low - type: integer + - name: dataset + type: directory_iterator required: false - - name: high - type: integer + - name: model_architecture + type: string + required: false + - name: input_shape + type: image_size + required: false + - name: loss + type: string + required: false + - name: learning_rate + type: number + required: false + - name: optimizer + type: string + required: false + - name: metrics_list + type: metrics_list + required: false + - name: uri + type: string + required: false + - name: imagenet_preprocessing + type: boolean + required: false + - name: classifier_kwargs + type: kwargs required: false outputs: - value: integer + classifier: sequential - init_tensorflow: - plugin: dioptra_custom.vc.backend_configs_tensorflow.init_tensorflow - inputs: - - seed: integer make_directories: plugin: dioptra_custom.vc.artifacts_utils.make_directories diff --git a/examples/mnist-classifier-demo/src/train.yml b/examples/mnist-classifier-demo/src/train.yml index a03403d9f..0cae72634 100644 --- a/examples/mnist-classifier-demo/src/train.yml +++ b/examples/mnist-classifier-demo/src/train.yml @@ -1,38 +1,20 @@ types: - rng: - optimizer: - name_parameters: - mapping: - name: string - parameters: - mapping: [string, any] metrics_list: list: mapping: name: string parameters: mapping: [string, any] - performance_metrics: - metrics: - callbacks_in: + callbacks_list: list: mapping: name: string parameters: mapping: [string, any] - callbacks_out: - mapping: - name: string - parameters: - mapping: [string, any] directory_iterator: - parameters: - mapping: [string, number] image_size: tuple: [integer, integer, integer] sequential: - fit_kwargs: - mapping: [string, any] fit_kwargs_null: union: - mapping: [string, any] @@ -41,6 +23,8 @@ types: union: [string, "null"] num_null: union: [number, "null"] + kwargs: + mapping: [string, any] parameters: seed_p: -1 @@ -56,69 +40,21 @@ parameters: register_model_name: "mnist_classifier" tasks: - init_rng: - plugin: dioptra_custom.vc.random_rng.init_rng + load_dataset: + plugin: dioptra_custom.fgm_mnist_demo.plugins.load_dataset inputs: - - name: seed + - name: ep_seed type: integer required: false - outputs: - - ret1: integer - - ret2: rng - - draw_random_integer: - plugin: dioptra_custom.vc.random_sample.draw_random_integer - inputs: - - rng: rng - - name: low - type: integer + - name: data_dir + type: string required: false - - name: high - type: integer + - name: subset + type: str_null + required: false + - name: image_size + type: image_size required: false - outputs: - value: integer - - init_tensorflow: - plugin: dioptra_custom.vc.backend_configs_tensorflow.init_tensorflow - inputs: - - seed: integer - - log_parameters: - plugin: dioptra_custom.vc.tracking_mlflow.log_parameters - inputs: - - parameters: parameters - - get_optimizer: - plugin: dioptra_custom.vc.tensorflow.get_optimizer - inputs: - - name: optimizer - type: string - - learning_rate: number - outputs: - optimizer: optimizer - - get_performance_metrics: - plugin: dioptra_custom.vc.tensorflow.get_performance_metrics - inputs: - - metrics_list: metrics_list - outputs: - performance_metrics: performance_metrics - - get_model_callbacks: - plugin: dioptra_custom.vc.tensorflow.get_model_callbacks - inputs: - - callbacks_list: callbacks_in - outputs: - callbacks: callbacks_out - - create_image_dataset: - plugin: dioptra_custom.vc.data_tensorflow.create_image_dataset - inputs: - - data_dir: string - - subset: str_null - - image_size: image_size - - seed: integer - name: rescale type: number required: false @@ -131,87 +67,88 @@ tasks: - name: label_mode type: string required: false + - name: shuffle + type: boolean + required: false outputs: dataset: directory_iterator - get_n_classes_from_directory_iterator: - plugin: dioptra_custom.vc.data_tensorflow.get_n_classes_from_directory_iterator - inputs: - - ds: directory_iterator - outputs: - num_classes: integer - - init_classifier: - plugin: dioptra_custom.vc.estimators_keras_classifiers.init_classifier + get_model: + plugin: dioptra_custom.fgm_mnist_demo.plugins.get_model inputs: - - model_architecture: string - - optimizer: optimizer - - metrics: performance_metrics - - input_shape: image_size - - n_classes: integer + - name: dataset + type: directory_iterator + required: false + - name: model_architecture + type: string + required: false + - name: input_shape + type: image_size + required: false - name: loss type: string required: false + - name: learning_rate + type: number + required: false + - name: optimizer + type: string + required: false + - name: metrics_list + type: metrics_list + required: false + - name: uri + type: string + required: false + - name: imagenet_preprocessing + type: boolean + required: false + - name: classifier_kwargs + type: kwargs + required: false outputs: classifier: sequential - fit: - plugin: dioptra_custom.vc.estimators_methods.fit + train: + plugin: dioptra_custom.fgm_mnist_demo.plugins.train inputs: + - model_name: string - estimator: any - x: any - name: y type: any required: false + - name: callbacks_list + type: callbacks_list + required: false - name: fit_kwargs type: fit_kwargs_null required: false - - evaluate_metrics_tensorflow: - plugin: dioptra_custom.vc.tensorflow.evaluate_metrics_tensorflow + compute_metrics: + plugin: dioptra_custom.fgm_mnist_demo.plugins.compute_metrics inputs: - classifier: sequential - dataset: directory_iterator - outputs: - metrics: metrics - - log_metrics: - plugin: dioptra_custom.vc.tracking_mlflow.log_metrics - inputs: - - metrics: metrics - - log_tensorflow_keras_estimator: - plugin: dioptra_custom.vc.tracking_mlflow.log_tensorflow_keras_estimator - inputs: - - estimator: sequential - - model_dir: string - - add_model_to_registry: - plugin: dioptra_custom.vc.mlflow.add_model_to_registry - inputs: - - name: name - type: string - - model_dir: string get_none: - plugin: dioptra_custom.vc.tensorflow.get_none + plugin: dioptra_custom.fgm_mnist_demo.tensorflow.get_none inputs: - arg: string outputs: ret: "null" process_float: - plugin: dioptra_custom.vc.tensorflow.process_float + plugin: dioptra_custom.fgm_mnist_demo.tensorflow.process_float inputs: - arg: string outputs: ret: number process_int: - plugin: dioptra_custom.vc.tensorflow.process_int + plugin: dioptra_custom.fgm_mnist_demo.tensorflow.process_int inputs: - arg: string outputs: ret: integer process_int_list: - plugin: dioptra_custom.vc.tensorflow.process_int_list + plugin: dioptra_custom.fgm_mnist_demo.tensorflow.process_int_list inputs: - arg: string outputs: @@ -239,133 +176,75 @@ graph: seed: process_int: $seed_p - init_rng: - init_rng: $seed - - global_seed: - draw_random_integer: - rng: $init_rng.ret2 - - dataset_seed: - draw_random_integer: - rng: $init_rng.ret2 - - init_tensorflow: - init_tensorflow: $global_seed - - log_params: - log_parameters: - - entry_point_seed: $init_rng.ret1 - tensorflow_global_seed: $global_seed - dataset_seed: $dataset_seed - - optimizer: - get_optimizer: - optimizer: $optimizer_name - learning_rate: $learning_rate - dependencies: - - init_tensorflow - - perf_metrics: - get_performance_metrics: - - - name: CategoricalAccuracy - parameters: { name: accuracy } - - name: Precision - parameters: { name: precision } - - name: Recall - parameters: { name: recall } - - name: AUC - parameters: { name: auc } - dependencies: - - init_tensorflow - - callbacks: - get_model_callbacks: - - - name: EarlyStopping - parameters: - monitor: val_loss - min_delta: .01 - patience: 5 - restore_best_weights: true - dependencies: - - init_tensorflow - - training_dataset: - create_image_dataset: + training_data: + load_dataset: + ep_seed: $seed data_dir: $training_dir subset: training image_size: $image_size - seed: $dataset_seed validation_split: $validation_split batch_size: $batch_size - dependencies: - - init_tensorflow - validation_dataset: - create_image_dataset: + validation_data: + load_dataset: + ep_seed: $seed data_dir: $training_dir subset: validation image_size: $image_size - seed: $dataset_seed validation_split: $validation_split batch_size: $batch_size - dependencies: - - init_tensorflow - testing_dataset: - create_image_dataset: + testing_data: + load_dataset: + ep_seed: $seed data_dir: $testing_dir subset: null image_size: $image_size - seed: $dataset_seed - validation_split: null + validation_split: $validation_split batch_size: $batch_size - dependencies: - - init_tensorflow - - num_classes: - get_n_classes_from_directory_iterator: $training_dataset + - classifier: - init_classifier: + model: + get_model: + dataset: $training_data model_architecture: $model_architecture - optimizer: $optimizer - metrics: $perf_metrics input_shape: $image_size - n_classes: $num_classes + learning_rate: $learning_rate + optimizer: $optimizer_name + metrics_list: + - name: CategoricalAccuracy + parameters: { name: accuracy } + - name: Precision + parameters: { name: precision } + - name: Recall + parameters: { name: recall } + - name: AUC + parameters: { name: auc } dependencies: - - init_tensorflow - - model: - fit: - estimator: $classifier - x: $training_dataset + - training_data + + training: + train: + model_name: $register_model_name + estimator: $model + x: $training_data + callbacks_list: + - name: EarlyStopping + parameters: + monitor: val_loss + min_delta: .01 + patience: 5 + restore_best_weights: true fit_kwargs: nb_epochs: $epochs - validation_data: $validation_dataset - callbacks: $callbacks + validation_data: $validation_data verbose: 2 - - eval_metrics_tensorflow: - evaluate_metrics_tensorflow: - - $classifier - - $testing_dataset dependencies: - model - log_metrics: - log_metrics: $eval_metrics_tensorflow - - log_keras_estimator: - log_tensorflow_keras_estimator: - - $classifier - - model - dependencies: - - model - - add_model_to_registry: - add_model_to_registry: - - $register_model_name - - model + metrics: + compute_metrics: + classifier: $model + dataset: $testing_data dependencies: - - log_keras_estimator + - training \ No newline at end of file diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/artifacts_restapi.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/artifacts_restapi.py new file mode 100644 index 000000000..9847e45ed --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/artifacts_restapi.py @@ -0,0 +1,151 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +import requests +import structlog + +from dioptra import pyplugs +from structlog.stdlib import BoundLogger +from posixpath import join as urljoin +from urllib.parse import urlparse, urlunparse + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + +@pyplugs.register +def get_uri_for_artifact(job_id, index=0): + session, url = get_logged_in_session() + job = get(session, url, 'jobs', str(job_id)) + uri = job['artifacts'][index]['artifactUri'] + return uri +def get_artifacts_for_job(job_id): + session, url = get_logged_in_session() + job = get(session, url, 'jobs', str(job_id)) + return [artifact['artifactUri'] for artifact in job['artifacts']] + +def get_logged_in_session(): + session = requests.Session() + url = "http://dioptra-deployment-restapi:5000/api/v1" + + login = post(session, url, {'username':'pluginuser', 'password':'pleasemakesuretoPLUGINthecomputer'}, 'auth', 'login') + LOGGER.info("login request sent", response=str(login)) + + return session, url + +def upload_model_to_restapi(name, source_uri, job_id): + version = 0 + model_id = 0 + + session, url = get_logged_in_session() + + models = get(session, url, f'models?search={name}&pageLength=500') + LOGGER.info("requesting models from RESTAPI", response=models) + + + for model in models['data']: + #check whether to create a new model + if model['name'] == name: + model_id = model['id'] + if model['latestVersion'] != None: + version = model['latestVersion']['versionNumber'] + 1 + if (version == 0 and model_id == 0): + LOGGER.info("creating new model on RESTAPI") + model = post(session, url, {"group": 1, "name": name, "description": f"{name} model"}, "models") + model_id = model['id'] + LOGGER.info("new model created", response=model) + + artifact = post(session, url, {"group": 1, "description": f"{name} model artifact", "job": str(job_id), "uri": source_uri}, 'artifacts') + LOGGER.info("artifact", response=artifact) + model_version = post(session, url, {"description": f"{name} model version", "artifact": artifact['id']}, 'models', str(model_id), 'versions') + LOGGER.info("model created", response=model_version) + +def upload_artifact_to_restapi(source_uri, job_id): + session, url = get_logged_in_session() + + artifact = post(session, url, {"group": 1, "description": f"artifact for job {job_id}", "job": str(job_id), "uri": source_uri}, 'artifacts') + LOGGER.info("artifact", response=artifact) + +def debug_request(url, method, data=None): + LOGGER.debug("Request made.", url=url, method=method, data=data) + + +def debug_response(json): + LOGGER.debug("Response received.", json=json) + + +def get(session, endpoint, *features): + debug_request(urljoin(endpoint, *features), "GET") + return make_request(session, "get", endpoint, None, *features) + + +def post(session, endpoint, data, *features): + debug_request(urljoin(endpoint, *features), "POST", data) + return make_request(session, "post", endpoint, data, *features) + + +def delete(session, endpoint, data, *features): + debug_request(urljoin(endpoint, *features), "DELETE", data) + return make_request(session, "delete", endpoint, data, *features) + + +def put(session, endpoint, data, *features): + debug_request(urljoin(endpoint, *features), "PUT", data) + return make_request(session, "put", endpoint, data, *features) + + +def make_request(session, method_name, endpoint, data, *features): + url = urljoin(endpoint, *features) + method = getattr(session, method_name) + try: + if data: + response = method(url, json=data) + else: + response = method(url) + if response.status_code != 200: + raise StatusCodeError() + json = response.json() + except (requests.ConnectionError, StatusCodeError, requests.JSONDecodeError) as e: + handle_error(session, url, method_name.upper(), data, response, e) + debug_response(json=json) + return json + + +def handle_error(session, url, method, data, response, error): + if type(error) is requests.ConnectionError: + restapi = os.environ["DIOPTRA_RESTAPI_URI"] + message = ( + f"Could not connect to the REST API. Is the server running at {restapi}?" + ) + LOGGER.error(message, url=url, method=method, data=data, response=response.text) + raise APIConnectionError(message) + if type(error) is StatusCodeError: + message = f"Error code {response.status_code} returned." + LOGGER.error(message, url=url, method=method, data=data, response=response.text) + raise StatusCodeError(message) + if type(error) is requests.JSONDecodeError: + message = "JSON response could not be decoded." + LOGGER.error(message, url=url, method=method, data=data, response=response.text) + raise JSONDecodeError(message) + +class APIConnectionError(Exception): + """Class for connection errors""" + + +class StatusCodeError(Exception): + """Class for status code errors""" + + +class JSONDecodeError(Exception): + """Class for JSON decode errors""" diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/attacks_fgm.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/attacks_fgm.py new file mode 100644 index 000000000..323efa1c4 --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/attacks_fgm.py @@ -0,0 +1,294 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +"""A task plugin module for the Fast Gradient Method evasion attack. + +The Fast Gradient Method (FGM) [goodfellow2015]_ is an evasion attack that attempts to +fool a trained classifier by perturbing a test image using the gradient of the +classifier's neural network. This task plugin uses the Adversarial Robustness Toolbox's +[art2019]_ implementation of the |fgm_art|. + +References: + .. [art2019] M.-I. Nicolae et al., "Adversarial Robustness Toolbox v1.0.0," + Nov. 2019. [Online]. Available: + `arXiv:1807.01069v4 [cs.LG] `_. + + .. [goodfellow2015] I. Goodfellow, J. Shlens, and C. Szegedy. (May 2015). + Explaining and Harnessing Adversarial Examples, Presented at the Int. Conf. + on Learn. Represent. 2015, San Diego, California, United States. [Online]. + Available: `arXiv:1412.6572v3 [stat.ML] `_. + +.. |fgm_art| replace:: `Fast Gradient Method `__ +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Callable, Dict, List, Optional, Tuple, Union + +import mlflow +import numpy as np +import pandas as pd +import scipy.stats +import structlog +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs +from dioptra.sdk.exceptions import ARTDependencyError, TensorflowDependencyError +from dioptra.sdk.utilities.decorators import require_package + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + +try: + from art.attacks.evasion import FastGradientMethod + from art.estimators.classification import TensorFlowV2Classifier + +except ImportError: # pragma: nocover + LOGGER.warn( + "Unable to import one or more optional packages, functionality may be reduced", + package="art", + ) + + +try: + from tensorflow.keras.preprocessing.image import ImageDataGenerator, save_img + +except ImportError: # pragma: nocover + LOGGER.warn( + "Unable to import one or more optional packages, functionality may be reduced", + package="tensorflow", + ) + + +@pyplugs.register +@require_package("art", exc_type=ARTDependencyError) +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def create_adversarial_fgm_dataset( + data_flow: Any, + data_dir: str, + adv_data_dir: Union[str, Path], + keras_classifier: TensorFlowV2Classifier, + image_size: Tuple[int, int, int], + distance_metrics_list: Optional[List[Tuple[str, Callable[..., np.ndarray]]]] = None, + rescale: float = 1.0 / 255, + batch_size: int = 32, + label_mode: str = "categorical", + eps: float = 0.3, + eps_step: float = 0.1, + minimal: bool = False, + norm: Union[int, float, str] = np.inf, +) -> pd.DataFrame: + """Generates an adversarial dataset using the Fast Gradient Method attack. + + Each generated adversarial image is saved as an image file in the directory + specified by `adv_data_dir` and the distance metric functions passed to + `distance_metrics_list` are used to quantify the size of the perturbation applied to + each image. + + Args: + data_dir: The directory containing the clean test images. + adv_data_dir: The directory to use when saving the generated adversarial images. + keras_classifier: A trained :py:class:`~art.estimators.classification\\ + .TensorFlowV2Classifier`. + image_size: A tuple of integers `(height, width, channels)` used to preprocess + the images so that they all have the same dimensions and number of color + channels. `channels=3` means RGB color images and `channels=1` means + grayscale images. Images with different dimensions will be resized. If + `channels=1`, color images will be converted into grayscale. + distance_metrics_list: A list of distance metrics to compute after generating an + adversarial image. If `None`, then no distance metrics will be calculated. + The default is `None`. + rescale: The rescaling factor for the pixel vectors. If `None` or `0`, no + rescaling is applied, otherwise multiply the data by the value provided + (after applying all other transformations). The default is `1.0 / 255`. + batch_size: The size of the batch on which adversarial samples are generated. + The default is `32`. + label_mode: Determines how the label arrays for the dataset will be returned. + The available choices are: `"categorical"`, `"binary"`, `"sparse"`, + `"input"`, `None`. For information on the meaning of each choice, see + the documentation for |flow_from_directory|. The default is `"categorical"`. + eps: The attack step size. The default is `0.3`. + eps_step: The step size of the input variation for minimal perturbation + computation. The default is `0.1`. + minimal: If `True`, compute the minimal perturbation, and use `eps_step` for the + step size and `eps` for the maximum perturbation. The default is `False`. + norm: The norm of the adversarial perturbation. Can be `"inf"`, + :py:data:`numpy.inf`, `1`, or `2`. The default is :py:data:`numpy.inf`. + + Returns: + A :py:class:`~pandas.DataFrame` containing the full distribution of the + calculated distance metrics. + + See Also: + - |flow_from_directory| + + .. |flow_from_directory| replace:: :py:meth:`tf.keras.preprocessing.image\\ + .ImageDataGenerator.flow_from_directory` + """ + distance_metrics_list = distance_metrics_list or [] + adv_data_dir = Path(adv_data_dir) + + attack = _init_fgm( + keras_classifier=keras_classifier, + batch_size=batch_size, + eps=eps, + eps_step=eps_step, + minimal=minimal, + norm=norm, + ) + + num_images = data_flow.n + img_filenames = [Path(x) for x in data_flow.filenames] + + distance_metrics_: Dict[str, List[List[float]]] = {"image": [], "label": []} + for metric_name, _ in distance_metrics_list: + distance_metrics_[metric_name] = [] + + LOGGER.info( + "Generate adversarial images", + attack="fgm", + num_batches=num_images // batch_size, + ) + + for batch_num, (x, y) in enumerate(data_flow): + if batch_num >= num_images // batch_size: + break + + clean_filenames = img_filenames[ + batch_num * batch_size : (batch_num + 1) * batch_size # noqa: E203 + ] + + LOGGER.info( + "Generate adversarial image batch", + attack="fgm", + batch_num=batch_num, + ) + + y_int = np.argmax(y, axis=1) + adv_batch = attack.generate(x=x) + + _save_adv_batch(adv_batch, adv_data_dir, y_int, clean_filenames) + + _evaluate_distance_metrics( + clean_filenames=clean_filenames, + distance_metrics_=distance_metrics_, + clean_batch=x, + adv_batch=adv_batch, + distance_metrics_list=distance_metrics_list, + ) + + LOGGER.info("Adversarial image generation complete", attack="fgm") + _log_distance_metrics(distance_metrics_) + + return pd.DataFrame(distance_metrics_) + + +def _init_fgm( + keras_classifier: TensorFlowV2Classifier, batch_size: int, **kwargs +) -> FastGradientMethod: + """Initializes :py:class:`~art.attacks.evasion.FastGradientMethod`. + + Args: + keras_classifier: A trained :py:class:`~art.estimators.classification\\ + .TensorFlowV2Classifier`. + batch_size: The size of the batch on which adversarial samples are generated. + + Returns: + A :py:class:`~art.attacks.evasion.FastGradientMethod` object. + """ + attack: FastGradientMethod = FastGradientMethod( + estimator=keras_classifier, batch_size=batch_size, **kwargs + ) + return attack + + +def _save_adv_batch(adv_batch, adv_data_dir, y, clean_filenames) -> None: + """Saves a batch of adversarial images to disk. + + Args: + adv_batch: A generated batch of adversarial images. + adv_data_dir: The directory to use when saving the generated adversarial images. + y: An array containing the target labels of the original images. + clean_filenames: A list containing the filenames of the original images. + """ + for batch_image_num, adv_image in enumerate(adv_batch): + adv_image_path = ( + adv_data_dir + / f"{y[batch_image_num]}" + / f"adv_{clean_filenames[batch_image_num].name}" + ) + + if not adv_image_path.parent.exists(): + adv_image_path.parent.mkdir(parents=True) + + save_img(path=str(adv_image_path), x=adv_image) + + +def _evaluate_distance_metrics( + clean_filenames, distance_metrics_, clean_batch, adv_batch, distance_metrics_list +) -> None: + """Calculates distance metrics for a batch of clean/adversarial image pairs. + + Args: + clean_filenames: A list containing the filenames of the original images. + distance_metrics_: A dictionary used to record the values of the distance + metrics computed for the clean/adversarial image pairs. + clean_batch: The clean images used to generate the adversarial images in + `adv_batch`. + adv_batch: A generated batch of adversarial images. + distance_metrics_list: A list of distance metrics to compute after generating an + adversarial image. + """ + LOGGER.debug("evaluate image perturbations using distance metrics") + distance_metrics_["image"].extend([x.name for x in clean_filenames]) + distance_metrics_["label"].extend([x.parent for x in clean_filenames]) + for metric_name, metric in distance_metrics_list: + distance_metrics_[metric_name].extend(metric(clean_batch, adv_batch)) + + +def _log_distance_metrics(distance_metrics_: Dict[str, List[List[float]]]) -> None: + """Logs the distance metrics summary statistics to the MLFlow Tracking service. + + The following summary statistics are calculated and logged to the MLFlow Tracking + service for each of the distributions recorded in the `distance_metrics_` + dictionary: + + - mean + - median + - standard deviation + - interquartile range + - minimum + - maximum + + Args: + distance_metrics_: A dictionary used to record the values of the distance + metrics computed for the clean/adversarial image pairs. + """ + distance_metrics_ = distance_metrics_.copy() + del distance_metrics_["image"] + del distance_metrics_["label"] + for metric_name, metric_values_list in distance_metrics_.items(): + metric_values = np.array(metric_values_list) + mlflow.log_metric(key=f"{metric_name}_mean", value=metric_values.mean()) + mlflow.log_metric(key=f"{metric_name}_median", value=np.median(metric_values)) + mlflow.log_metric(key=f"{metric_name}_stdev", value=metric_values.std()) + mlflow.log_metric( + key=f"{metric_name}_iqr", value=scipy.stats.iqr(metric_values) + ) + mlflow.log_metric(key=f"{metric_name}_min", value=metric_values.min()) + mlflow.log_metric(key=f"{metric_name}_max", value=metric_values.max()) + LOGGER.info("logged distance-based metric", metric_name=metric_name) diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/backend_configs_tensorflow.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/backend_configs_tensorflow.py new file mode 100644 index 000000000..10ca767e5 --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/backend_configs_tensorflow.py @@ -0,0 +1,52 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +"""A task plugin module for initializing and configuring Tensorflow.""" + +from __future__ import annotations + +import structlog +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs +from dioptra.sdk.exceptions import TensorflowDependencyError +from dioptra.sdk.utilities.decorators import require_package + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + + +try: + import tensorflow as tf + +except ImportError: # pragma: nocover + LOGGER.warn( + "Unable to import one or more optional packages, functionality may be reduced", + package="tensorflow", + ) + + +@pyplugs.register +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def init_tensorflow(seed: int) -> None: + """Initializes Tensorflow to ensure reproducibility. + + This task plugin **must** be run before any other features from Tensorflow are used + to ensure reproducibility. + + Args: + seed: The seed to use for Tensorflow's random number generator. + """ + tf.random.set_seed(seed) diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/data_tensorflow.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/data_tensorflow.py new file mode 100644 index 000000000..8b16d6804 --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/data_tensorflow.py @@ -0,0 +1,130 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +"""A task plugin module for preparing Tensorflow-specific dataset iterators. + +.. |flow_from_directory| replace:: :py:meth:`tensorflow.keras.preprocessing.image\\ + .ImageDataGenerator.flow_from_directory` +.. |directory_iterator| replace:: :py:class:`~tensorflow.keras.preprocessing.image\\ + .DirectoryIterator` +""" + +from __future__ import annotations + +from typing import Optional, Tuple + +import structlog +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs +from dioptra.sdk.exceptions import TensorflowDependencyError +from dioptra.sdk.utilities.decorators import require_package + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + +try: + from tensorflow.keras.preprocessing.image import ( + DirectoryIterator, + ImageDataGenerator, + ) + +except ImportError: # pragma: nocover + LOGGER.warn( + "Unable to import one or more optional packages, functionality may be reduced", + package="tensorflow", + ) + + +@pyplugs.register +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def create_image_dataset( + data_dir: str, + subset: Optional[str], + image_size: Tuple[int, int, int], + seed: int, + rescale: float = 1.0 / 255, + validation_split: Optional[float] = 0.2, + batch_size: int = 32, + label_mode: str = "categorical", + shuffle: bool = True, +) -> DirectoryIterator: + """Yields an iterator for generating batches of real-time augmented image data. + + Args: + data_dir: The directory containing the image dataset. + subset: The subset of data (`"training"` or `"validation"`) to use if + `validation_split` is not `None`. If `None`, then `validation_split` must + also be `None`. + image_size: A tuple of integers `(height, width, channels)` used to preprocess + the images so that they all have the same dimensions and number of color + channels. `channels=3` means RGB color images and `channels=1` means + grayscale images. Images with different dimensions will be resized. If + `channels=1`, color images will be converted into grayscale. + seed: Sets the random seed used for shuffling and transformations. + rescale: The rescaling factor for the pixel vectors. If `None` or `0`, no + rescaling is applied, otherwise multiply the data by the value provided + (after applying all other transformations). The default is `1.0 / 255`. + validation_split: The fraction of the data to set aside for validation. If not + `None`, the value given here must be between `0` and `1`. If `None`, then + there is no validation set. The default is `0.2`. + batch_size: The size of the batch on which adversarial samples are generated. + The default is `32`. + label_mode: Determines how the label arrays for the dataset will be returned. + The available choices are: `"categorical"`, `"binary"`, `"sparse"`, + `"input"`, `None`. For information on the meaning of each choice, see + the documentation for |flow_from_directory|. The default is `"categorical"`. + + Returns: + A :py:class:`~tensorflow.keras.preprocessing.image.DirectoryIterator` object. + + See Also: + - |flow_from_directory| + - :py:class:`~tensorflow.keras.preprocessing.image.DirectoryIterator` + """ + color_mode: str = ( + "rgb" if image_size[2] == 3 else "rgba" if image_size[2] == 4 else "grayscale" + ) + target_size: Tuple[int, int] = image_size[:2] + + data_generator: ImageDataGenerator = ImageDataGenerator( + rescale=rescale, + validation_split=validation_split, + ) + + return data_generator.flow_from_directory( + directory=data_dir, + target_size=target_size, + color_mode=color_mode, + class_mode=label_mode, + batch_size=batch_size, + seed=seed, + subset=subset, + shuffle=shuffle + ) + + +@pyplugs.register +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def get_n_classes_from_directory_iterator(ds: DirectoryIterator) -> int: + """Returns the number of unique labels found by the |directory_iterator|. + + Args: + ds: A |directory_iterator| object. + + Returns: + The number of unique labels in the dataset. + """ + return len(ds.class_indices) diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/estimators_keras_classifiers.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/estimators_keras_classifiers.py new file mode 100644 index 000000000..f5ef72548 --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/estimators_keras_classifiers.py @@ -0,0 +1,231 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +"""Neural network image classifiers implemented in Tensorflow/Keras.""" + +from __future__ import annotations + +from types import FunctionType +from typing import Callable, Dict, List, Tuple, Union + +import structlog +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs +from dioptra.sdk.exceptions import TensorflowDependencyError +from dioptra.sdk.utilities.decorators import require_package + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + +try: + from tensorflow.keras.layers import ( + BatchNormalization, + Conv2D, + Dense, + Dropout, + Flatten, + MaxPooling2D, + ) + from tensorflow.keras.metrics import Metric + from tensorflow.keras.models import Sequential + from tensorflow.keras.optimizers import Optimizer + +except ImportError: # pragma: nocover + LOGGER.warn( + "Unable to import one or more optional packages, functionality may be reduced", + package="tensorflow", + ) + + +@pyplugs.register +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def init_classifier( + model_architecture: str, + optimizer: Optimizer, + metrics: List[Union[Metric, FunctionType]], + input_shape: Tuple[int, int, int], + n_classes: int, + loss: str = "categorical_crossentropy", +) -> Sequential: + """Initializes an untrained neural network image classifier for Tensorflow/Keras. + + The `model_architecture` argument is used to select a neural network architecture + from the architecture registry. The string passed to `model_architecture` must match + one of the following, + + - `"shallow_net"` - A shallow neural network architecture. + - `"le_net"` - The LeNet-5 convolutional neural network architecture. + - `"alex_net"` - The AlexNet convolutional neural network architecture. + + Args: + model_architecture: The neural network architecture to use. + optimizer: A Keras :py:class:`~tf.keras.optimizers.Optimizer` providing an + algorithm to use to train the estimator, such as + :py:class:`~tf.keras.optimizers.SGD` and + :py:class:`~tf.keras.optimizers.Adam`. + metrics: A list of metrics to be evaluated by the model during training and + testing. + input_shape: A shape tuple of integers, not including the batch size, specifying + the dimensions of the image data. The shape tuple for all classifiers in the + architecture registry follows the convention `(height, width, channels)`. + n_classes: The number of target labels in the dataset. + loss: A string specifying the loss function to be minimized during training. The + string must match the name of one of the loss functions in the + :py:mod:`tf.keras.losses` module. The default is + `"categorical_crossentropy"`. + + Returns: + A compiled :py:class:`~tf.keras.Sequential` object. + + See Also: + - :py:mod:`tf.keras.losses` + - :py:mod:`tf.keras.optimizers` + - :py:class:`tf.keras.Sequential` + """ + classifier: Sequential = KERAS_CLASSIFIERS_REGISTRY[model_architecture]( + input_shape, + n_classes, + ) + classifier.compile(loss=loss, optimizer=optimizer, metrics=metrics) + + return classifier + + +def shallow_net(input_shape: Tuple[int, int, int], n_classes: int) -> Sequential: + """Builds an untrained shallow neural network architecture for Tensorflow/Keras. + + Args: + input_shape: A shape tuple of integers, not including the batch size, specifying + the dimensions of the image data. The shape tuple for all classifiers in the + architecture registry follows the convention `(height, width, channels)`. + n_classes: The number of target labels in the dataset. + + Returns: + A :py:class:`~tf.keras.Sequential` object. + + See Also: + - :py:class:`tf.keras.Sequential` + """ + model = Sequential() + + # Flatten inputs + model.add(Flatten(input_shape=input_shape)) + + # single hidden layer: + model.add(Dense(32, activation="sigmoid")) + + # output layer: + model.add(Dense(n_classes, activation="softmax")) + + return model + + +def le_net(input_shape: Tuple[int, int, int], n_classes: int) -> Sequential: + """Builds an untrained LeNet-5 neural network architecture for Tensorflow/Keras. + + Args: + input_shape: A shape tuple of integers, not including the batch size, specifying + the dimensions of the image data. The shape tuple for all classifiers in the + architecture registry follows the convention `(height, width, channels)`. + n_classes: The number of target labels in the dataset. + + Returns: + A :py:class:`~tf.keras.Sequential` object. + + See Also: + - :py:class:`tf.keras.Sequential` + """ + model = Sequential() + + # first convolutional layer: + model.add( + Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=input_shape) + ) + + # second conv layer, with pooling and dropout: + model.add(Conv2D(64, kernel_size=(3, 3), activation="relu")) + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(Dropout(0.25)) + model.add(Flatten()) + + # dense hidden layer, with dropout: + model.add(Dense(128, activation="relu")) + model.add(Dropout(0.5)) + + # output layer: + model.add(Dense(n_classes, activation="softmax")) + + return model + + +def alex_net(input_shape: Tuple[int, int, int], n_classes: int) -> Sequential: + """Builds an untrained AlexNet neural network architecture for Tensorflow/Keras. + + Args: + input_shape: A shape tuple of integers, not including the batch size, specifying + the dimensions of the image data. The shape tuple for all classifiers in the + architecture registry follows the convention `(height, width, channels)`. + n_classes: The number of target labels in the dataset. + + Returns: + A :py:class:`~tf.keras.Sequential` object. + + See Also: + - :py:class:`tf.keras.Sequential` + """ + model = Sequential() + + # first conv-pool block: + model.add( + Conv2D( + 96, + kernel_size=(11, 11), + strides=(4, 4), + activation="relu", + input_shape=input_shape, + ) + ) + model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2))) + model.add(BatchNormalization()) + + # second conv-pool block: + model.add(Conv2D(256, kernel_size=(5, 5), activation="relu")) + model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2))) + model.add(BatchNormalization()) + + # third conv-pool block: + model.add(Conv2D(256, kernel_size=(3, 3), activation="relu")) + model.add(Conv2D(384, kernel_size=(3, 3), activation="relu")) + model.add(Conv2D(384, kernel_size=(3, 3), activation="relu")) + model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2))) + model.add(BatchNormalization()) + + # dense layers: + model.add(Flatten()) + model.add(Dense(4096, activation="tanh")) + model.add(Dropout(0.5)) + model.add(Dense(4096, activation="tanh")) + model.add(Dropout(0.5)) + + # output layer: + model.add(Dense(n_classes, activation="softmax")) + + return model + + +KERAS_CLASSIFIERS_REGISTRY: Dict[ + str, Callable[[Tuple[int, int, int], int], Sequential] +] = dict(shallow_net=shallow_net, le_net=le_net, alex_net=alex_net) diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/estimators_methods.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/estimators_methods.py new file mode 100644 index 000000000..28396c530 --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/estimators_methods.py @@ -0,0 +1,122 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +from __future__ import annotations + +import datetime +from typing import Any, Dict, Optional + +import mlflow +import structlog +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs +from dioptra.sdk.generics import estimator_predict, fit_estimator + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + + +@pyplugs.register +def fit( + estimator: Any, + x: Any = None, + y: Any = None, + fit_kwargs: Optional[Dict[str, Any]] = None, +) -> Any: + """Fits the estimator to the given data. + + This task plugin wraps :py:func:`~dioptra.sdk.generics.fit_estimator`, which is a + generic function that uses multiple argument dispatch to handle the estimator + fitting method for different machine learning libraries. The modules attached to the + advertised plugin entry point `dioptra.generics.fit_estimator` are used to build the + function dispatch registry at runtime. For more information on the supported fitting + methods and `fit_kwargs` arguments, please refer to the documentation of the + registered dispatch functions. + + Args: + estimator: The model to be trained. + x: The input data to be used for training. + y: The target data to be used for training. + fit_kwargs: An optional dictionary of keyword arguments to pass to the + dispatched function. + + Returns: + The object returned by the estimator's fitting function. For further details on + the type of object this method can return, see the documentation for the + registered dispatch functions. + + See Also: + - :py:func:`dioptra.sdk.generics.fit_estimator` + """ + fit_kwargs = fit_kwargs or {} + time_start: datetime.datetime = datetime.datetime.now() + + LOGGER.info( + "Begin estimator fit", + timestamp=time_start.isoformat(), + ) + + estimator_fit_result: Any = fit_estimator(estimator, x, y, **fit_kwargs) + + time_end: datetime.datetime = datetime.datetime.now() + + total_seconds: float = (time_end - time_start).total_seconds() + total_minutes: float = total_seconds / 60 + + mlflow.log_metric("training_time_in_minutes", total_minutes) + LOGGER.info( + "Estimator fit complete", + timestamp=time_end.isoformat(), + total_minutes=total_minutes, + ) + + return estimator_fit_result + + +@pyplugs.register +def predict( + estimator: Any, + x: Any = None, + predict_kwargs: Optional[Dict[str, Any]] = None, +) -> Any: + """Uses the estimator to make predictions on the given input data. + + This task plugin wraps :py:func:`~dioptra.sdk.generics.estimator_predict`, which is + a generic function that uses multiple argument dispatch to handle estimator + prediction methods for different machine learning libraries. The modules attached to + the advertised plugin entry point `dioptra.generics.estimator_predict` are used to + build the function dispatch registry at runtime. For more information on the + supported prediction methods and `predict_kwargs` arguments, refer to the + documentation of the registered dispatch functions. + + Args: + estimator: A trained model to be used to generate predictions. + x: The input data for which to generate predictions. + predict_kwargs: An optional dictionary of keyword arguments to pass to the + dispatched function. + + Returns: + The object returned by the estimator's predict function. For further details on + the type of object this method can return, see the documentation for the + registered dispatch functions. + + See Also: + - :py:func:`dioptra.sdk.generics.estimator_predict` + """ + predict_kwargs = predict_kwargs or {} + prediction: Any = estimator_predict(estimator, x, **predict_kwargs) + + return prediction diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/import_keras.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/import_keras.py new file mode 100644 index 000000000..b5d03b51c --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/import_keras.py @@ -0,0 +1,65 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +from __future__ import annotations + +import importlib +from types import FunctionType, ModuleType +from typing import Union + +import structlog +from structlog.stdlib import BoundLogger + +from dioptra.sdk.exceptions import TensorflowDependencyError +from dioptra.sdk.utilities.decorators import require_package + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + +try: + from tensorflow.keras.callbacks import Callback + from tensorflow.keras.metrics import Metric + from tensorflow.keras.optimizers import Optimizer + +except ImportError: # pragma: nocover + LOGGER.warn( + "Unable to import one or more optional packages, functionality may be reduced", + package="tensorflow", + ) + +KERAS_CALLBACKS: str = "tensorflow.keras.callbacks" +KERAS_METRICS: str = "tensorflow.keras.metrics" +KERAS_OPTIMIZERS: str = "tensorflow.keras.optimizers" + + +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def get_callback(callback_name: str) -> Callback: + keras_callbacks: ModuleType = importlib.import_module(KERAS_CALLBACKS) + callback: Callback = getattr(keras_callbacks, callback_name) + return callback + + +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def get_metric(metric_name: str) -> Union[Metric, FunctionType]: + keras_metrics: ModuleType = importlib.import_module(KERAS_METRICS) + metric: Metric = getattr(keras_metrics, metric_name) + return metric + + +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def get_optimizer(optimizer_name: str) -> Optimizer: + keras_optimizers: ModuleType = importlib.import_module(KERAS_OPTIMIZERS) + optimizer: Optimizer = getattr(keras_optimizers, optimizer_name) + return optimizer diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/mlflow.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/mlflow.py new file mode 100644 index 000000000..8546dff8c --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/mlflow.py @@ -0,0 +1,103 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +"""A task plugin module for using the MLFlow model registry.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +import mlflow +import os +import structlog +from mlflow.entities.model_registry import ModelVersion +from mlflow.tracking import MlflowClient +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs +from .artifacts_restapi import upload_model_to_restapi +LOGGER: BoundLogger = structlog.stdlib.get_logger() + + +@pyplugs.register +def add_model_to_registry(name: str, model_dir: str) -> Optional[ModelVersion]: + """Registers a trained model logged during the current run to the MLFlow registry. + + Args: + active_run: The :py:class:`mlflow.ActiveRun` object managing the current run's + state. + name: The registration name to use for the model. + model_dir: The relative artifact directory where MLFlow logged the model trained + during the current run. + + Returns: + A :py:class:`~mlflow.entities.model_registry.ModelVersion` object created by the + backend. + """ + job_id = os.environ['__JOB_ID'] + if not name.strip(): + return None + + active_run = mlflow.active_run() + + run_id: str = active_run.info.run_id + artifact_uri: str = active_run.info.artifact_uri + source: str = f"{artifact_uri}/{model_dir}" + + registered_models = [x.name for x in MlflowClient().search_registered_models()] + + if name not in registered_models: + LOGGER.info("create registered model", name=name) + MlflowClient().create_registered_model(name=name) + + LOGGER.info("create model version", name=name, source=source, run_id=run_id) + model_version: ModelVersion = MlflowClient().create_model_version( + name=name, source=source, run_id=run_id + ) + upload_model_to_restapi(name, source, job_id) + + return model_version + + +@pyplugs.register +def get_experiment_name() -> str: + """Gets the name of the experiment for the current run. + + Args: + active_run: The :py:class:`mlflow.ActiveRun` object managing the current run's + state. + + Returns: + The name of the experiment. + """ + active_run = mlflow.active_run() + + experiment_name: str = ( + MlflowClient().get_experiment(active_run.info.experiment_id).name + ) + LOGGER.info( + "Obtained experiment name of active run", experiment_name=experiment_name + ) + + return experiment_name + + +@pyplugs.register +def prepend_cwd(path: str) -> Path: + ret = Path.cwd() / path + return ret + diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/plugins.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/plugins.py index e69de29bb..3b1215385 100644 --- a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/plugins.py +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/plugins.py @@ -0,0 +1,137 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +from __future__ import annotations + +from pathlib import Path +from typing import Callable, Dict, List, Optional, Tuple, Union, Any + +import mlflow +import numpy as np +import pandas as pd +import scipy.stats +import structlog +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs +from .tensorflow import get_optimizer, get_model_callbacks, get_performance_metrics, evaluate_metrics_tensorflow +from .estimators_keras_classifiers import init_classifier +from .registry_art import load_wrapped_tensorflow_keras_classifier +from .random_rng import init_rng +from .random_sample import draw_random_integer +from .backend_configs_tensorflow import init_tensorflow +from .tracking_mlflow import log_parameters, log_tensorflow_keras_estimator, log_metrics +from .data_tensorflow import get_n_classes_from_directory_iterator, create_image_dataset +from .estimators_methods import fit +from .mlflow import add_model_to_registry + + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + +@pyplugs.register +def load_dataset( + ep_seed: int = 10145783023, + data_dir: str = "/dioptra/data/Mnist/testing", + subset: Optional[str] = "testing", + image_size: Tuple[int, int, int] = [28, 28, 1], + rescale: float = 1.0 / 255, + validation_split: Optional[float] = 0.2, + batch_size: int = 32, + label_mode: str = "categorical", + shuffle: bool = False +) -> DirectoryIterator: + seed, rng = init_rng(ep_seed) + global_seed = draw_random_integer(rng) + dataset_seed = draw_random_integer(rng) + init_tensorflow(global_seed) + if (subset == "training"): + log_parameters( + {'entry_point_seed': ep_seed, + 'tensorflow_global_seed':global_seed, + 'dataset_seed':dataset_seed}) + dataset = create_image_dataset( + data_dir=data_dir, + subset=subset, + image_size=image_size, + seed=dataset_seed, + rescale=rescale, + validation_split=validation_split, + batch_size=batch_size, + label_mode=label_mode, + shuffle=shuffle + ) + return dataset + +@pyplugs.register +def get_model( + dataset: DirectoryIterator = None, + model_architecture: str = "le_net", + input_shape: Tuple[int, int, int] = [28, 28, 1], + loss: str = "categorical_crossentropy", + learning_rate: float = 0.001, + optimizer: str = "Adam", + metrics_list: List[Dict[str, Any]] = None, + uri: str | None = None, + imagenet_preprocessing: bool = False, + classifier_kwargs: Optional[Dict[str, Any]] = None +): + + if uri is None: + # create a model + n_classes = get_n_classes_from_directory_iterator(dataset) + optim = get_optimizer(optimizer, learning_rate) + perf_metrics = get_performance_metrics(metrics_list) + classifier = init_classifier(model_architecture, optim, perf_metrics, input_shape, n_classes, loss) + else: + # load a model + classifier = load_wrapped_tensorflow_keras_classifier(uri, imagenet_preprocessing, classifier_kwargs) + return classifier + +@pyplugs.register +def train( + model_name: str, + estimator: Any, + x: Any = None, + y: Any = None, + callbacks_list: List[Dict[str, Any]] = None, + fit_kwargs: Optional[Dict[str, Any]] = None +): + fit_kwargs = {} if fit_kwargs is None else fit_kwargs + callbacks = get_model_callbacks(callbacks_list) + fit_kwargs['callbacks'] = callbacks + trained_model = fit(estimator=estimator, x=x, y=y, fit_kwargs=fit_kwargs) + log_tensorflow_keras_estimator(estimator, "model") + add_model_to_registry(model_name, "model") + +@pyplugs.register +def attack(): + pass + +@pyplugs.register +def compute_metrics( + classifier: Any, + dataset: Any +): + metrics = evaluate_metrics_tensorflow(classifier, dataset) + log_metrics(metrics) + +@pyplugs.register +def augment_data(): + pass + +@pyplugs.register +def predict(): + pass diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/random_rng.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/random_rng.py new file mode 100644 index 000000000..d10b2bd60 --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/random_rng.py @@ -0,0 +1,56 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +"""A task plugin module for managing random number generators.""" + +from __future__ import annotations + +from typing import Tuple + +import numpy as np +import structlog +from numpy.random._generator import Generator as RNGenerator +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + + +@pyplugs.register +@pyplugs.task_nout(2) +def init_rng(seed: int = -1) -> Tuple[int, RNGenerator]: + """Constructs a new random number generator. + + Args: + seed: A seed to initialize the random number generator. If the value is less + than zero, then the seed is generated by pulling fresh, unpredictable + entropy from the OS. The default is `-1`. + + Returns: + A tuple containing the seed and the initialized random number generator. If a + `seed < 0` was passed as an argument, then the seed generated by the OS will be + returned. + + See Also: + - :py:func:`numpy.random.default_rng` + """ + rng = np.random.default_rng(seed if seed >= 0 else None) + + if seed < 0: + seed = rng.bit_generator._seed_seq.entropy # type: ignore[attr-defined] + + return int(seed), rng diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/random_sample.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/random_sample.py new file mode 100644 index 000000000..33c13d5d5 --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/random_sample.py @@ -0,0 +1,89 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +"""A task plugin module for drawing random samples.""" + +from __future__ import annotations + +from typing import Optional, Tuple, Union + +import numpy as np +import structlog +from numpy.random._generator import Generator as RNGenerator +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + + +@pyplugs.register +def draw_random_integer(rng: RNGenerator, low: int = 0, high: int = 2**31 - 1) -> int: + """Returns a random integer from `low` (inclusive) to `high` (exclusive). + + The integer is sampled from a uniform distribution. + + Args: + rng: A random number generator returned by :py:func:`~.rng.init_rng`. + low: Lowest (signed) integers to be drawn from the distribution (unless + `high=None`, in which case this parameter is `0` and this value is used for + `high`). + high: If not `None`, one above the largest (signed) integer to be drawn from the + distribution (see above for behavior if `high=None`) + + Returns: + A random integer. + + See Also: + - :py:meth:`numpy.random.Generator.integers` + """ + result: int = int(rng.integers(low=low, high=high)) + + return result + + +@pyplugs.register +def draw_random_integers( + rng: RNGenerator, + low: int = 0, + high: int = 2**31 - 1, + size: Optional[Union[int, Tuple[int, ...]]] = None, +) -> np.ndarray: + """Returns random integers from `low` (inclusive) to `high` (exclusive). + + The integers are sampled from a uniform distribution. + + Args: + rng: A random number generator returned by :py:func:`~.rng.init_rng`. + low: Lowest (signed) integers to be drawn from the distribution (unless + `high=None`, in which case this parameter is `0` and this value is used for + `high`). + high: If not `None`, one above the largest (signed) integer to be drawn from the + distribution (see above for behavior if `high=None`). + size: The output shape of array. If the given shape is, e.g., `(m, n, k)`, then + `m * n * k` samples are drawn. If `None`, a single value is returned. The + default is `None`. + + Returns: + A `size`-shaped array of random integers. + + See Also: + - :py:meth:`numpy.random.Generator.integers` + """ + size = size or 1 + result: np.ndarray = rng.integers(low=low, high=high, size=size) + + return result diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/registry_art.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/registry_art.py new file mode 100644 index 000000000..7286cf002 --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/registry_art.py @@ -0,0 +1,107 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +"""A task plugin module for interfacing the |ART| with the MLFlow model registry. + +.. |ART| replace:: `Adversarial Robustness Toolbox\ + `__ +""" + +from __future__ import annotations + +from typing import Any, Dict, Optional + +import numpy as np +import structlog +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs +from dioptra.sdk.exceptions import ARTDependencyError, TensorflowDependencyError +from dioptra.sdk.utilities.decorators import require_package + +from .registry_mlflow import load_tensorflow_keras_classifier + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + +try: + from art.estimators.classification import TensorFlowV2Classifier + +except ImportError: # pragma: nocover + LOGGER.warn( + "Unable to import one or more optional packages, functionality may be reduced", + package="art", + ) + + +try: + from tensorflow.keras import losses + from tensorflow.keras.models import Sequential + +except ImportError: # pragma: nocover + LOGGER.warn( + "Unable to import one or more optional packages, functionality may be reduced", + package="tensorflow", + ) + + +@pyplugs.register +@require_package("art", exc_type=ARTDependencyError) +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def load_wrapped_tensorflow_keras_classifier( + artifact_uri: str, + imagenet_preprocessing: bool = False, + classifier_kwargs: Optional[Dict[str, Any]] = None, +) -> TensorFlowV2Classifier: + """Loads and wraps a registered Keras classifier for compatibility with the |ART|. + + Args: + name: The name of the registered model in the MLFlow model registry. + version: The version number of the registered model in the MLFlow registry. + classifier_kwargs: A dictionary mapping argument names to values which will + be passed to the TensorFlowV2Classifier constructor. + Returns: + A trained :py:class:`~art.estimators.classification.TensorFlowV2Classifier` + object. + + See Also: + - :py:class:`art.estimators.classification.TensorFlowV2Classifier` + - :py:func:`.mlflow.load_tensorflow_keras_classifier` + """ + classifier_kwargs = classifier_kwargs or {} + keras_classifier: Sequential = load_tensorflow_keras_classifier( + uri=artifact_uri + ) + nb_classes = keras_classifier.output_shape[1] + input_shape = keras_classifier.input_shape + loss_object = losses.get(keras_classifier.loss) + preprocessing = ( + (np.array([103.939, 116.779, 123.680]), np.array([1.0, 1.0, 1.0])) + if imagenet_preprocessing + else None + ) + wrapped_keras_classifier: TensorFlowV2Classifier = TensorFlowV2Classifier( + model=keras_classifier, + nb_classes=nb_classes, + input_shape=input_shape, + loss_object=loss_object, + preprocessing=preprocessing, + **classifier_kwargs, + ) + LOGGER.info( + "Wrap Keras classifier for compatibility with Adversarial Robustness Toolbox" + ) + + return wrapped_keras_classifier diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/registry_mlflow.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/registry_mlflow.py new file mode 100644 index 000000000..23d8519aa --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/registry_mlflow.py @@ -0,0 +1,120 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +"""A task plugin module for using the MLFlow model registry.""" + +from __future__ import annotations + +from typing import Optional + +import mlflow +import structlog +from mlflow.entities import Run as MlflowRun +from mlflow.entities.model_registry import ModelVersion +from mlflow.tracking import MlflowClient +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs +from dioptra.sdk.exceptions import TensorflowDependencyError +from dioptra.sdk.utilities.decorators import require_package + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + +try: + from tensorflow.keras.models import Sequential + +except ImportError: # pragma: nocover + LOGGER.warn( + "Unable to import one or more optional packages, functionality may be reduced", + package="tensorflow", + ) + + +@pyplugs.register +def add_model_to_registry( + active_run: MlflowRun, name: str, model_dir: str +) -> Optional[ModelVersion]: + """Registers a trained model logged during the current run to the MLFlow registry. + + Args: + active_run: The :py:class:`mlflow.ActiveRun` object managing the current run's + state. + name: The registration name to use for the model. + model_dir: The relative artifact directory where MLFlow logged the model trained + during the current run. + + Returns: + A :py:class:`~mlflow.entities.model_registry.ModelVersion` object created by the + backend. + """ + if not name.strip(): + return None + + run_id: str = active_run.info.run_id + artifact_uri: str = active_run.info.artifact_uri + source: str = f"{artifact_uri}/{model_dir}" + + registered_models = [x.name for x in MlflowClient().search_registered_models()] + + if name not in registered_models: + LOGGER.info("create registered model", name=name) + MlflowClient().create_registered_model(name=name) + + LOGGER.info("create model version", name=name, source=source, run_id=run_id) + model_version: ModelVersion = MlflowClient().create_model_version( + name=name, source=source, run_id=run_id + ) + + return model_version + + +@pyplugs.register +def get_experiment_name(active_run: MlflowRun) -> str: + """Gets the name of the experiment for the current run. + + Args: + active_run: The :py:class:`mlflow.ActiveRun` object managing the current run's + state. + + Returns: + The name of the experiment. + """ + experiment_name: str = ( + MlflowClient().get_experiment(active_run.info.experiment_id).name + ) + LOGGER.info( + "Obtained experiment name of active run", experiment_name=experiment_name + ) + + return experiment_name + + +@pyplugs.register +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def load_tensorflow_keras_classifier(uri: str) -> Sequential: + """Loads a registered Keras classifier. + + Args: + name: The name of the registered model in the MLFlow model registry. + version: The version number of the registered model in the MLFlow registry. + + Returns: + A trained :py:class:`tf.keras.Sequential` object. + """ + LOGGER.info("Load Keras classifier from model registry", uri=uri) + + return mlflow.keras.load_model(model_uri=uri) + \ No newline at end of file diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/tensorflow.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/tensorflow.py new file mode 100644 index 000000000..1d640e2c1 --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/tensorflow.py @@ -0,0 +1,112 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +from __future__ import annotations + +from types import FunctionType +from typing import Any, Dict, List, Union + +import structlog +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs +from dioptra.sdk.exceptions import TensorflowDependencyError +from dioptra.sdk.utilities.decorators import require_package + +from . import import_keras + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + +try: + from tensorflow.keras.callbacks import Callback + from tensorflow.keras.metrics import Metric + from tensorflow.keras.optimizers import Optimizer + +except ImportError: # pragma: nocover + LOGGER.warn( + "Unable to import one or more optional packages, functionality may be reduced", + package="tensorflow", + ) + + +@pyplugs.register +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def evaluate_metrics_tensorflow(classifier, dataset) -> Dict[str, float]: + result = classifier.evaluate(dataset, verbose=0) + return dict(zip(classifier.metrics_names, result)) + + +@pyplugs.register +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def get_optimizer(optimizer: str, learning_rate: float) -> Optimizer: + return import_keras.get_optimizer(optimizer)(learning_rate) + + +@pyplugs.register +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def get_model_callbacks(callbacks_list: List[Dict[str, Any]]) -> List[Callback]: + return [ + import_keras.get_callback(callback["name"])(**callback.get("parameters", {})) + for callback in callbacks_list + ] + + +@pyplugs.register +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def get_performance_metrics( + metrics_list: List[Dict[str, Any]] +) -> List[Union[Metric, FunctionType]]: + performance_metrics: List[Metric] = [] + + for metric in metrics_list: + new_metric: Union[Metric, FunctionType] = import_keras.get_metric( + metric["name"] + ) + performance_metrics.append( + new_metric(**metric.get("parameters")) + if not isinstance(new_metric, FunctionType) and metric.get("parameters") + else new_metric + ) + + return performance_metrics + +@pyplugs.register +def process_int_list(arg: str): + lst = arg.replace('[','').replace(']', '').replace(' ','') + lst = list(map(lambda x: int(x), lst.split(','))) + return lst + +@pyplugs.register +def process_float_list(arg: str): + lst = arg.replace('[','').replace(']', '').replace(' ','') + lst = list(map(lambda x: float(x), lst.split(','))) + return lst + +@pyplugs.register +def process_float(arg: str): + return float(arg) + +@pyplugs.register +def process_int(arg: str): + return int(arg) + +@pyplugs.register +def process_bool(arg: str): + return bool(arg) + +@pyplugs.register +def get_none(arg: str): + return None \ No newline at end of file diff --git a/examples/task-plugins/dioptra_custom/fgm_mnist_demo/tracking_mlflow.py b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/tracking_mlflow.py new file mode 100644 index 000000000..f7f878cd5 --- /dev/null +++ b/examples/task-plugins/dioptra_custom/fgm_mnist_demo/tracking_mlflow.py @@ -0,0 +1,99 @@ +# This Software (Dioptra) is being made available as a public service by the +# National Institute of Standards and Technology (NIST), an Agency of the United +# States Department of Commerce. This software was developed in part by employees of +# NIST and in part by NIST contractors. Copyright in portions of this software that +# were developed by NIST contractors has been licensed or assigned to NIST. Pursuant +# to Title 17 United States Code Section 105, works of NIST employees are not +# subject to copyright protection in the United States. However, NIST may hold +# international copyright in software created by its employees and domestic +# copyright (or licensing rights) in portions of software that were assigned or +# licensed to NIST. To the extent that NIST holds copyright in this software, it is +# being made available under the Creative Commons Attribution 4.0 International +# license (CC BY 4.0). The disclaimers of the CC BY 4.0 license apply to all parts +# of the software developed or licensed by NIST. +# +# ACCESS THE FULL CC BY 4.0 LICENSE HERE: +# https://creativecommons.org/licenses/by/4.0/legalcode +"""A task plugin module for using the MLFlow Tracking service.""" + +from __future__ import annotations + +from typing import Dict + +import mlflow +import structlog +from structlog.stdlib import BoundLogger + +from dioptra import pyplugs +from dioptra.sdk.exceptions import TensorflowDependencyError +from dioptra.sdk.utilities.decorators import require_package + +LOGGER: BoundLogger = structlog.stdlib.get_logger() + +try: + from tensorflow.keras.models import Sequential + +except ImportError: # pragma: nocover + LOGGER.warn( + "Unable to import one or more optional packages, functionality may be reduced", + package="tensorflow", + ) + + +@pyplugs.register +def log_metrics(metrics: Dict[str, float]) -> None: + """Logs metrics to the MLFlow Tracking service for the current run. + + Args: + metrics: A dictionary with the metrics to be logged. The keys are the metric + names and the values are the metric values. + + See Also: + - :py:func:`mlflow.log_metric` + """ + for metric_name, metric_value in metrics.items(): + mlflow.log_metric(key=metric_name, value=metric_value) + LOGGER.info( + "Log metric to MLFlow Tracking server", + metric_name=metric_name, + metric_value=metric_value, + ) + + +@pyplugs.register +def log_parameters(parameters: Dict[str, float]) -> None: + """Logs parameters to the MLFlow Tracking service for the current run. + + Parameters can only be set once per run. + + Args: + parameters: A dictionary with the parameters to be logged. The keys are the + parameter names and the values are the parameter values. + + See Also: + - :py:func:`mlflow.log_param` + """ + for parameter_name, parameter_value in parameters.items(): + mlflow.log_param(key=parameter_name, value=parameter_value) + LOGGER.info( + "Log parameter to MLFlow Tracking server", + parameter_name=parameter_name, + parameter_value=parameter_value, + ) + + +@pyplugs.register +@require_package("tensorflow", exc_type=TensorflowDependencyError) +def log_tensorflow_keras_estimator(estimator: Sequential, model_dir: str) -> None: + """Logs a Keras estimator trained during the current run to the MLFlow registry. + + Args: + estimator: A trained Keras estimator. + model_dir: The relative artifact directory where MLFlow should save the + model. + """ + mlflow.keras.log_model(model=estimator, artifact_path=model_dir) + LOGGER.info( + "Tensorflow Keras model logged to tracking server", + model_dir=model_dir, + )