Skip to content

Commit

Permalink
Fixing various errors in the structured_data files (#2013)
Browse files Browse the repository at this point in the history
* fixing errors in movielens_recommendations_transformers.py

* fixing errors in wide_deep_cross_networks.py

* fixing data download errors in feature_space_advanced.py

* generating .ipynb and .md files for the movielens_recommendations_transformers.py

* generating .ipynb and .md files for the wide_deep_cross_networks.py

* generating .ipynb and .md files for the feature_space_advanced.py
  • Loading branch information
Humbulani1234 authored Jan 9, 2025
1 parent 22554c6 commit 17ce1f3
Show file tree
Hide file tree
Showing 12 changed files with 45,902 additions and 559 deletions.
6 changes: 3 additions & 3 deletions examples/structured_data/feature_space_advanced.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Title: FeatureSpace advanced use cases
Author: [Dimitre Oliveira](https://www.linkedin.com/in/dimitre-oliveira-7a1a0113a/)
Date created: 2023/07/01
Last modified: 2023/07/01
Last modified: 2025/01/03
Description: How to use FeatureSpace for advanced preprocessing use cases.
Accelerator: None
"""
Expand Down Expand Up @@ -90,7 +90,7 @@

data_url = "https://archive.ics.uci.edu/static/public/222/bank+marketing.zip"
data_zipped_path = keras.utils.get_file("bank_marketing.zip", data_url, extract=True)
keras_datasets_path = Path(data_zipped_path).parents[0]
keras_datasets_path = Path(data_zipped_path)
with ZipFile(f"{keras_datasets_path}/bank-additional.zip", "r") as zip:
# Extract files
zip.extractall(path=keras_datasets_path)
Expand Down Expand Up @@ -538,7 +538,7 @@ def example_feature_space(dataset, feature_space, feature_names):
"""

model.fit(
preprocessed_train_ds, validation_data=preprocessed_valid_ds, epochs=20, verbose=2
preprocessed_train_ds, validation_data=preprocessed_valid_ds, epochs=10, verbose=2
)

"""
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
61 changes: 31 additions & 30 deletions examples/structured_data/ipynb/feature_space_advanced.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"\n",
"**Author:** [Dimitre Oliveira](https://www.linkedin.com/in/dimitre-oliveira-7a1a0113a/)<br>\n",
"**Date created:** 2023/07/01<br>\n",
"**Last modified:** 2023/07/01<br>\n",
"**Last modified:** 2025/01/03<br>\n",
"**Description:** How to use FeatureSpace for advanced preprocessing use cases."
]
},
Expand Down Expand Up @@ -94,7 +94,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -125,15 +125,15 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"data_url = \"https://archive.ics.uci.edu/static/public/222/bank+marketing.zip\"\n",
"data_zipped_path = keras.utils.get_file(\"bank_marketing.zip\", data_url, extract=True)\n",
"keras_datasets_path = Path(data_zipped_path).parents[0]\n",
"keras_datasets_path = Path(data_zipped_path)\n",
"with ZipFile(f\"{keras_datasets_path}/bank-additional.zip\", \"r\") as zip:\n",
" # Extract files\n",
" zip.extractall(path=keras_datasets_path)\n",
Expand All @@ -157,7 +157,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand All @@ -183,7 +183,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -215,7 +215,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -248,7 +248,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -291,7 +291,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -335,7 +335,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand All @@ -352,7 +352,8 @@
" print(f\"Input: {[{k:v.numpy()} for k, v in inputs.items()]}\")\n",
" print(\n",
" f\"Preprocessed output: {[{k:v.numpy()} for k, v in preprocessed_x.items()]}\"\n",
" )\n"
" )\n",
""
]
},
{
Expand Down Expand Up @@ -380,7 +381,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand All @@ -406,7 +407,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -436,7 +437,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -476,7 +477,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -514,7 +515,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -563,7 +564,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -612,7 +613,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -650,7 +651,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -722,7 +723,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand All @@ -748,7 +749,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -778,7 +779,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -806,7 +807,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -836,7 +837,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand All @@ -858,7 +859,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -886,14 +887,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
"outputs": [],
"source": [
"model.fit(\n",
" preprocessed_train_ds, validation_data=preprocessed_valid_ds, epochs=20, verbose=2\n",
" preprocessed_train_ds, validation_data=preprocessed_valid_ds, epochs=10, verbose=2\n",
")"
]
},
Expand Down Expand Up @@ -924,7 +925,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand All @@ -947,7 +948,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 0,
"metadata": {
"colab_type": "code"
},
Expand Down Expand Up @@ -1026,4 +1027,4 @@
},
"nbformat": 4,
"nbformat_minor": 0
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"\n",
"**Author:** [Khalid Salama](https://www.linkedin.com/in/khalid-salama-24403144/)<br>\n",
"**Date created:** 2020/12/30<br>\n",
"**Last modified:** 2020/12/30<br>\n",
"**Last modified:** 2025/01/03<br>\n",
"**Description:** Rating rate prediction using the Behavior Sequence Transformer (BST) model on the Movielens."
]
},
Expand Down Expand Up @@ -429,7 +429,7 @@
"outputs": [],
"source": [
"\n",
"def get_dataset_from_csv(csv_file_path, shuffle=False, batch_size=128):\n",
"def get_dataset_from_csv(csv_file_path, batch_size, shuffle=True):\n",
" def process(features):\n",
" movie_ids_string = features[\"sequence_movie_ids\"]\n",
" sequence_movie_ids = tf.strings.split(movie_ids_string, \",\").to_tensor()\n",
Expand All @@ -447,7 +447,7 @@
" target = sequence_ratings[:, -1]\n",
" features[\"sequence_ratings\"] = sequence_ratings[:, :-1]\n",
"\n",
" return features, target\n",
" return dict(features), target\n",
"\n",
" dataset = tf.data.experimental.make_csv_dataset(\n",
" csv_file_path,\n",
Expand Down Expand Up @@ -759,10 +759,10 @@
")\n",
"\n",
"# Read the training data.\n",
"train_dataset = get_dataset_from_csv(\"train_data.csv\", shuffle=True, batch_size=265)\n",
"train_dataset = get_dataset_from_csv(\"train_data.csv\", batch_size=265, shuffle=True)\n",
"\n",
"# Fit the model with the training data.\n",
"model.fit(train_dataset, epochs=5)\n",
"model.fit(train_dataset, epochs=2)\n",
"\n",
"# Read the test data.\n",
"test_dataset = get_dataset_from_csv(\"test_data.csv\", batch_size=265)\n",
Expand Down
11 changes: 8 additions & 3 deletions examples/structured_data/ipynb/wide_deep_cross_networks.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"\n",
"**Author:** [Khalid Salama](https://www.linkedin.com/in/khalid-salama-24403144/)<br>\n",
"**Date created:** 2020/12/31<br>\n",
"**Last modified:** 2021/05/05<br>\n",
"**Last modified:** 2025/01/03<br>\n",
"**Description:** Using Wide & Deep and Deep & Cross networks for structured data classification."
]
},
Expand Down Expand Up @@ -296,6 +296,11 @@
},
"outputs": [],
"source": [
"\n",
"# To convert the datasets elements to from OrderedDict to Dictionary\n",
"def process(features, target):\n",
" return dict(features), target\n",
"\n",
"\n",
"def get_dataset_from_csv(csv_file_path, batch_size, shuffle=False):\n",
" dataset = tf_data.experimental.make_csv_dataset(\n",
Expand All @@ -307,7 +312,7 @@
" num_epochs=1,\n",
" header=True,\n",
" shuffle=shuffle,\n",
" )\n",
" ).map(process)\n",
" return dataset.cache()\n",
""
]
Expand All @@ -333,7 +338,7 @@
"learning_rate = 0.001\n",
"dropout_rate = 0.1\n",
"batch_size = 265\n",
"num_epochs = 50\n",
"num_epochs = 1\n",
"\n",
"hidden_units = [32, 32]\n",
"\n",
Expand Down
Loading

0 comments on commit 17ce1f3

Please sign in to comment.