-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added quick start example to the README
- Loading branch information
1 parent
2c325ab
commit 1411337
Showing
6 changed files
with
495 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file added
BIN
+34.6 KB
...s/training_and_testing/images/thumb/sphx_glr_plot_using_external_data_thumb.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
97 changes: 97 additions & 0 deletions
97
docs/auto_examples/training_and_testing/plot_using_external_data.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"\n# Using External Test Data\n\nLet's learn how to use external test data with Fusilli!\nSome guidance can also be found in the `Data Loading <data-loading>` section of the documentation.\n\nThe extra step that we need to take is to provide the paths to the test data files to the functions that create evaluation figures: :class:`~fusilli.eval.RealsVsPreds.from_new_data`, :class:`~fusilli.eval.ConfusionMatrix.from_new_data`, :class:`~fusilli.eval.ModelComparison.from_new_data`.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>It is not possible to use external test data with graph-based fusion models.</p></div>\n\n\nWe'll rush through the first few steps of the training and testing process, as they are covered in more detail in the other example notebooks.\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import matplotlib.pyplot as plt\nfrom tqdm.auto import tqdm\nimport os\n\nfrom docs.examples import generate_sklearn_simulated_data\nfrom fusilli.data import prepare_fusion_data\nfrom fusilli.eval import RealsVsPreds, ModelComparison\nfrom fusilli.train import train_and_save_models\nfrom fusilli.utils.model_chooser import import_chosen_fusion_models\n\n# sphinx_gallery_thumbnail_number = -1\n\n\nmodel_conditions = {\n \"class_name\": [\"ConcatTabularData\"],\n}\n\nfusion_models = import_chosen_fusion_models(model_conditions)\n\n# Regression task\nprediction_task = \"regression\"\n\n# Set the batch size\nbatch_size = 48\n\n# Setting output directories\noutput_paths = {\n \"losses\": \"loss_logs/external_data\",\n \"checkpoints\": \"checkpoints/external_data\",\n \"figures\": \"figures/external_data\",\n}\n\nfor dir in output_paths.values():\n os.makedirs(dir, exist_ok=True)\n\n# Clearing the loss logs directory (only for the example notebooks)\nfor dir in os.listdir(output_paths[\"losses\"]):\n # remove files\n for file in os.listdir(os.path.join(output_paths[\"losses\"], dir)):\n os.remove(os.path.join(output_paths[\"losses\"], dir, file))\n # remove dir\n os.rmdir(os.path.join(output_paths[\"losses\"], dir))\n\ntabular1_path, tabular2_path = generate_sklearn_simulated_data(prediction_task,\n num_samples=500,\n num_tab1_features=10,\n num_tab2_features=20)\n\nexternal_tabular1_path, external_tabular2_path = generate_sklearn_simulated_data(prediction_task,\n num_samples=100,\n num_tab1_features=10,\n num_tab2_features=20,\n external=True)\ndata_paths = {\n \"tabular1\": tabular1_path,\n \"tabular2\": tabular2_path,\n \"image\": \"\",\n}\n\nexternal_data_paths = {\n \"tabular1\": external_tabular1_path,\n \"tabular2\": external_tabular2_path,\n \"image\": \"\",\n}\n\nfusion_model = fusion_models[0]\n\nprint(\"Method name:\", fusion_model.method_name)\nprint(\"Modality type:\", fusion_model.modality_type)\nprint(\"Fusion type:\", fusion_model.fusion_type)\n\n# Create the data module\ndm = prepare_fusion_data(prediction_task=prediction_task,\n fusion_model=fusion_model,\n data_paths=data_paths,\n output_paths=output_paths,\n batch_size=batch_size, )\n\n# train and test\ntrained_model = train_and_save_models(\n data_module=dm,\n fusion_model=fusion_model,\n enable_checkpointing=True,\n show_loss_plot=True,\n)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Evaluating with validation data\nWe'll start by evaluating the model with the validation data.\n\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"reals_preds_validation = RealsVsPreds.from_final_val_data(trained_model)\nplt.show()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Evaluating with external data\nNow we'll evaluate the model with the external data.\n\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"reals_preds_external = RealsVsPreds.from_new_data(trained_model,\n output_paths=output_paths,\n test_data_paths=external_data_paths)\nplt.show()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Removing checkpoint files\n\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"for dir in os.listdir(output_paths[\"checkpoints\"]):\n # remove files\n os.remove(os.path.join(output_paths[\"checkpoints\"], dir))" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.16" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
128 changes: 128 additions & 0 deletions
128
docs/auto_examples/training_and_testing/plot_using_external_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
""" | ||
Using External Test Data | ||
======================================================================== | ||
Let's learn how to use external test data with Fusilli! | ||
Some guidance can also be found in the :ref:`Data Loading <data-loading>` section of the documentation. | ||
The extra step that we need to take is to provide the paths to the test data files to the functions that create evaluation figures: :class:`~fusilli.eval.RealsVsPreds.from_new_data`, :class:`~fusilli.eval.ConfusionMatrix.from_new_data`, :class:`~fusilli.eval.ModelComparison.from_new_data`. | ||
.. note:: | ||
It is not possible to use external test data with graph-based fusion models. | ||
We'll rush through the first few steps of the training and testing process, as they are covered in more detail in the other example notebooks. | ||
""" | ||
|
||
import matplotlib.pyplot as plt | ||
from tqdm.auto import tqdm | ||
import os | ||
|
||
from docs.examples import generate_sklearn_simulated_data | ||
from fusilli.data import prepare_fusion_data | ||
from fusilli.eval import RealsVsPreds, ModelComparison | ||
from fusilli.train import train_and_save_models | ||
from fusilli.utils.model_chooser import import_chosen_fusion_models | ||
|
||
# sphinx_gallery_thumbnail_number = -1 | ||
|
||
|
||
model_conditions = { | ||
"class_name": ["ConcatTabularData"], | ||
} | ||
|
||
fusion_models = import_chosen_fusion_models(model_conditions) | ||
|
||
# Regression task | ||
prediction_task = "regression" | ||
|
||
# Set the batch size | ||
batch_size = 48 | ||
|
||
# Setting output directories | ||
output_paths = { | ||
"losses": "loss_logs/external_data", | ||
"checkpoints": "checkpoints/external_data", | ||
"figures": "figures/external_data", | ||
} | ||
|
||
for dir in output_paths.values(): | ||
os.makedirs(dir, exist_ok=True) | ||
|
||
# Clearing the loss logs directory (only for the example notebooks) | ||
for dir in os.listdir(output_paths["losses"]): | ||
# remove files | ||
for file in os.listdir(os.path.join(output_paths["losses"], dir)): | ||
os.remove(os.path.join(output_paths["losses"], dir, file)) | ||
# remove dir | ||
os.rmdir(os.path.join(output_paths["losses"], dir)) | ||
|
||
tabular1_path, tabular2_path = generate_sklearn_simulated_data(prediction_task, | ||
num_samples=500, | ||
num_tab1_features=10, | ||
num_tab2_features=20) | ||
|
||
external_tabular1_path, external_tabular2_path = generate_sklearn_simulated_data(prediction_task, | ||
num_samples=100, | ||
num_tab1_features=10, | ||
num_tab2_features=20, | ||
external=True) | ||
data_paths = { | ||
"tabular1": tabular1_path, | ||
"tabular2": tabular2_path, | ||
"image": "", | ||
} | ||
|
||
external_data_paths = { | ||
"tabular1": external_tabular1_path, | ||
"tabular2": external_tabular2_path, | ||
"image": "", | ||
} | ||
|
||
fusion_model = fusion_models[0] | ||
|
||
print("Method name:", fusion_model.method_name) | ||
print("Modality type:", fusion_model.modality_type) | ||
print("Fusion type:", fusion_model.fusion_type) | ||
|
||
# Create the data module | ||
dm = prepare_fusion_data(prediction_task=prediction_task, | ||
fusion_model=fusion_model, | ||
data_paths=data_paths, | ||
output_paths=output_paths, | ||
batch_size=batch_size, ) | ||
|
||
# train and test | ||
trained_model = train_and_save_models( | ||
data_module=dm, | ||
fusion_model=fusion_model, | ||
enable_checkpointing=True, | ||
show_loss_plot=True, | ||
) | ||
|
||
# %% | ||
# Evaluating with validation data | ||
# ----------------------------------------------- | ||
# We'll start by evaluating the model with the validation data. | ||
|
||
reals_preds_validation = RealsVsPreds.from_final_val_data(trained_model) | ||
plt.show() | ||
|
||
# %% | ||
# Evaluating with external data | ||
# ---------------------------------------------- | ||
# Now we'll evaluate the model with the external data. | ||
|
||
reals_preds_external = RealsVsPreds.from_new_data(trained_model, | ||
output_paths=output_paths, | ||
test_data_paths=external_data_paths) | ||
plt.show() | ||
|
||
# %% | ||
# Removing checkpoint files | ||
|
||
for dir in os.listdir(output_paths["checkpoints"]): | ||
# remove files | ||
os.remove(os.path.join(output_paths["checkpoints"], dir)) |
Oops, something went wrong.