Fixing various errors in the structured_data files (#2013)

* fixing errors in movielens_recommendations_transformers.py * fixing errors in wide_deep_cross_networks.py * fixing data download errors in feature_space_advanced.py * generating .ipynb and .md files for the movielens_recommendations_transformers.py * generating .ipynb and .md files for the wide_deep_cross_networks.py * generating .ipynb and .md files for the feature_space_advanced.py
keras-team · Jan 9, 2025 · 17ce1f3 · 17ce1f3
1 parent 22554c6
commit 17ce1f3
Show file tree

Hide file tree

Showing 12 changed files with 45,902 additions and 559 deletions.
diff --git a/examples/structured_data/feature_space_advanced.py b/examples/structured_data/feature_space_advanced.py
@@ -2,7 +2,7 @@
 Title: FeatureSpace advanced use cases
 Author: [Dimitre Oliveira](https://www.linkedin.com/in/dimitre-oliveira-7a1a0113a/)
 Date created: 2023/07/01
-Last modified: 2023/07/01
+Last modified: 2025/01/03
 Description: How to use FeatureSpace for advanced preprocessing use cases.
 Accelerator: None
 """
@@ -90,7 +90,7 @@
 
 data_url = "https://archive.ics.uci.edu/static/public/222/bank+marketing.zip"
 data_zipped_path = keras.utils.get_file("bank_marketing.zip", data_url, extract=True)
-keras_datasets_path = Path(data_zipped_path).parents[0]
+keras_datasets_path = Path(data_zipped_path)
 with ZipFile(f"{keras_datasets_path}/bank-additional.zip", "r") as zip:
     # Extract files
     zip.extractall(path=keras_datasets_path)
@@ -538,7 +538,7 @@ def example_feature_space(dataset, feature_space, feature_names):
 """
 
 model.fit(
-    preprocessed_train_ds, validation_data=preprocessed_valid_ds, epochs=20, verbose=2
+    preprocessed_train_ds, validation_data=preprocessed_valid_ds, epochs=10, verbose=2
 )
 
 """

diff --git a/.../structured_data/img/wide_deep_cross_networks/wide_deep_cross_networks_24_0.png b/.../structured_data/img/wide_deep_cross_networks/wide_deep_cross_networks_24_0.png
diff --git a/.../structured_data/img/wide_deep_cross_networks/wide_deep_cross_networks_29_0.png b/.../structured_data/img/wide_deep_cross_networks/wide_deep_cross_networks_29_0.png
diff --git a/.../structured_data/img/wide_deep_cross_networks/wide_deep_cross_networks_34_0.png b/.../structured_data/img/wide_deep_cross_networks/wide_deep_cross_networks_34_0.png
diff --git a/examples/structured_data/ipynb/feature_space_advanced.ipynb b/examples/structured_data/ipynb/feature_space_advanced.ipynb
@@ -10,7 +10,7 @@
     "\n",
     "**Author:** [Dimitre Oliveira](https://www.linkedin.com/in/dimitre-oliveira-7a1a0113a/)<br>\n",
     "**Date created:** 2023/07/01<br>\n",
-    "**Last modified:** 2023/07/01<br>\n",
+    "**Last modified:** 2025/01/03<br>\n",
     "**Description:** How to use FeatureSpace for advanced preprocessing use cases."
    ]
   },
@@ -94,7 +94,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -125,15 +125,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
     "data_url = \"https://archive.ics.uci.edu/static/public/222/bank+marketing.zip\"\n",
     "data_zipped_path = keras.utils.get_file(\"bank_marketing.zip\", data_url, extract=True)\n",
-    "keras_datasets_path = Path(data_zipped_path).parents[0]\n",
+    "keras_datasets_path = Path(data_zipped_path)\n",
     "with ZipFile(f\"{keras_datasets_path}/bank-additional.zip\", \"r\") as zip:\n",
     "    # Extract files\n",
     "    zip.extractall(path=keras_datasets_path)\n",
@@ -157,7 +157,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -183,7 +183,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -215,7 +215,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -248,7 +248,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -291,7 +291,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -335,7 +335,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -352,7 +352,8 @@
     "        print(f\"Input: {[{k:v.numpy()} for k, v in inputs.items()]}\")\n",
     "        print(\n",
     "            f\"Preprocessed output: {[{k:v.numpy()} for k, v in preprocessed_x.items()]}\"\n",
-    "        )\n"
+    "        )\n",
+    ""
    ]
   },
   {
@@ -380,7 +381,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -406,7 +407,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -436,7 +437,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -476,7 +477,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -514,7 +515,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -563,7 +564,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -612,7 +613,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -650,7 +651,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -722,7 +723,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -748,7 +749,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -778,7 +779,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -806,7 +807,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -836,7 +837,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -858,7 +859,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -886,14 +887,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
    "outputs": [],
    "source": [
     "model.fit(\n",
-    "    preprocessed_train_ds, validation_data=preprocessed_valid_ds, epochs=20, verbose=2\n",
+    "    preprocessed_train_ds, validation_data=preprocessed_valid_ds, epochs=10, verbose=2\n",
     ")"
    ]
   },
@@ -924,7 +925,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -947,7 +948,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 0,
    "metadata": {
     "colab_type": "code"
    },
@@ -1026,4 +1027,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}
diff --git a/examples/structured_data/ipynb/movielens_recommendations_transformers.ipynb b/examples/structured_data/ipynb/movielens_recommendations_transformers.ipynb
@@ -10,7 +10,7 @@
     "\n",
     "**Author:** [Khalid Salama](https://www.linkedin.com/in/khalid-salama-24403144/)<br>\n",
     "**Date created:** 2020/12/30<br>\n",
-    "**Last modified:** 2020/12/30<br>\n",
+    "**Last modified:** 2025/01/03<br>\n",
     "**Description:** Rating rate prediction using the Behavior Sequence Transformer (BST) model on the Movielens."
    ]
   },
@@ -429,7 +429,7 @@
    "outputs": [],
    "source": [
     "\n",
-    "def get_dataset_from_csv(csv_file_path, shuffle=False, batch_size=128):\n",
+    "def get_dataset_from_csv(csv_file_path, batch_size, shuffle=True):\n",
     "    def process(features):\n",
     "        movie_ids_string = features[\"sequence_movie_ids\"]\n",
     "        sequence_movie_ids = tf.strings.split(movie_ids_string, \",\").to_tensor()\n",
@@ -447,7 +447,7 @@
     "        target = sequence_ratings[:, -1]\n",
     "        features[\"sequence_ratings\"] = sequence_ratings[:, :-1]\n",
     "\n",
-    "        return features, target\n",
+    "        return dict(features), target\n",
     "\n",
     "    dataset = tf.data.experimental.make_csv_dataset(\n",
     "        csv_file_path,\n",
@@ -759,10 +759,10 @@
     ")\n",
     "\n",
     "# Read the training data.\n",
-    "train_dataset = get_dataset_from_csv(\"train_data.csv\", shuffle=True, batch_size=265)\n",
+    "train_dataset = get_dataset_from_csv(\"train_data.csv\", batch_size=265, shuffle=True)\n",
     "\n",
     "# Fit the model with the training data.\n",
-    "model.fit(train_dataset, epochs=5)\n",
+    "model.fit(train_dataset, epochs=2)\n",
     "\n",
     "# Read the test data.\n",
     "test_dataset = get_dataset_from_csv(\"test_data.csv\", batch_size=265)\n",

diff --git a/examples/structured_data/ipynb/wide_deep_cross_networks.ipynb b/examples/structured_data/ipynb/wide_deep_cross_networks.ipynb
@@ -10,7 +10,7 @@
     "\n",
     "**Author:** [Khalid Salama](https://www.linkedin.com/in/khalid-salama-24403144/)<br>\n",
     "**Date created:** 2020/12/31<br>\n",
-    "**Last modified:** 2021/05/05<br>\n",
+    "**Last modified:** 2025/01/03<br>\n",
     "**Description:** Using Wide & Deep and Deep & Cross networks for structured data classification."
    ]
   },
@@ -296,6 +296,11 @@
    },
    "outputs": [],
    "source": [
+    "\n",
+    "# To convert the datasets elements to from OrderedDict to Dictionary\n",
+    "def process(features, target):\n",
+    "    return dict(features), target\n",
+    "\n",
     "\n",
     "def get_dataset_from_csv(csv_file_path, batch_size, shuffle=False):\n",
     "    dataset = tf_data.experimental.make_csv_dataset(\n",
@@ -307,7 +312,7 @@
     "        num_epochs=1,\n",
     "        header=True,\n",
     "        shuffle=shuffle,\n",
-    "    )\n",
+    "    ).map(process)\n",
     "    return dataset.cache()\n",
     ""
    ]
@@ -333,7 +338,7 @@
     "learning_rate = 0.001\n",
     "dropout_rate = 0.1\n",
     "batch_size = 265\n",
-    "num_epochs = 50\n",
+    "num_epochs = 1\n",
     "\n",
     "hidden_units = [32, 32]\n",
     "\n",