Remove HugeCTR examples

NVIDIA-Merlin · May 9, 2023 · e4e5396 · e4e5396
1 parent 92dc8cb
commit e4e5396
Showing 5 changed files with 44 additions and 1,181 deletions.
diff --git a/examples/scaling-criteo/01-Download-Convert.ipynb b/examples/scaling-criteo/01-Download-Convert.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -33,7 +33,7 @@
     "\n",
     "# Scaling Criteo: Download and Convert\n",
     "\n",
-    "This notebook is created using the latest stable [merlin-hugectr](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr/tags), [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags), or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container. \n",
+    "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container. \n",
     "\n",
     "## Criteo 1TB Click Logs dataset\n",
     "\n",
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -74,7 +74,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -151,7 +151,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -177,7 +177,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -196,7 +196,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -227,7 +227,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -277,7 +277,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -297,28 +297,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "python3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
-  },
-  "merlin": {
-   "containers": [
-    "nvcr.io/nvidia/merlin/merlin-hugectr:latest",
-    "nvcr.io/nvidia/merlin/merlin-tensorflow:latest",
-    "nvcr.io/nvidia/merlin/merlin-pytorch:latest"
-   ]
   }
  },
  "nbformat": 4,

diff --git a/examples/scaling-criteo/02-ETL-with-NVTabular.ipynb b/examples/scaling-criteo/02-ETL-with-NVTabular.ipynb
@@ -2,12 +2,8 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "jupyter": {
-     "outputs_hidden": false
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Copyright 2021 NVIDIA Corporation. All Rights Reserved.\n",
@@ -37,7 +33,7 @@
     "\n",
     "# Scaling Criteo: ETL with NVTabular\n",
     "\n",
-    "This notebook is created using the latest stable [merlin-hugectr](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr/tags), [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags), or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container.\n",
+    "This notebook is created using the latest stable [merlin-tensorflow](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow/tags) or [merlin-pytorch](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch/tags) container.\n",
     "\n",
     "## Overview\n",
     "\n",
@@ -80,12 +76,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "jupyter": {
-     "outputs_hidden": false
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Standard Libraries\n",
@@ -122,19 +114,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "jupyter": {
-     "outputs_hidden": false
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# define some information about where to get our data\n",
     "BASE_DIR = os.environ.get(\"BASE_DIR\", \"/raid/data/criteo\")\n",
     "INPUT_DATA_DIR = os.environ.get(\"INPUT_DATA_DIR\", BASE_DIR + \"/converted/criteo\")\n",
     "OUTPUT_DATA_DIR = os.environ.get(\"OUTPUT_DATA_DIR\", BASE_DIR + \"/test_dask/output\")\n",
-    "USE_HUGECTR = bool(os.environ.get(\"USE_HUGECTR\", \"\"))\n",
     "stats_path = os.path.join(OUTPUT_DATA_DIR, \"test_dask/stats\")\n",
     "dask_workdir = os.path.join(OUTPUT_DATA_DIR, \"test_dask/workdir\")\n",
     "\n",
@@ -163,7 +150,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -179,12 +166,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "jupyter": {
-     "outputs_hidden": false
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -216,12 +199,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "jupyter": {
-     "outputs_hidden": false
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
@@ -465,7 +444,7 @@
        "<Client: 'tcp://127.0.0.1:44059' processes=2 threads=2, memory=100.00 GiB>"
       ]
      },
-     "execution_count": 9,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -537,12 +516,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {
-    "jupyter": {
-     "outputs_hidden": false
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# define our dataset schema\n",
@@ -568,24 +543,19 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We need to enforce the required HugeCTR data types, so we set them in a dictionary and give as an argument when creating our dataset. The dictionary defines the output datatypes of our datasets."
+    "Optionally, we can define the output datatypes of our datasets."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {
-    "jupyter": {
-     "outputs_hidden": false
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "dict_dtypes = {}\n",
     "\n",
-    "# The environment variable USE_HUGECTR defines, if we want to use the output for HugeCTR or another framework\n",
     "for col in CATEGORICAL_COLUMNS:\n",
-    "    dict_dtypes[col] = np.int64 if USE_HUGECTR else np.int32\n",
+    "    dict_dtypes[col] = np.int32\n",
     "\n",
     "for col in CONTINUOUS_COLUMNS:\n",
     "    dict_dtypes[col] = np.float32\n",
@@ -603,12 +573,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "jupyter": {
-     "outputs_hidden": false
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "train_dataset = nvt.Dataset(train_paths, engine=\"parquet\", part_size=part_size)\n",
@@ -624,12 +590,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "jupyter": {
-     "outputs_hidden": false
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
    "source": [
     "output_train_dir = os.path.join(OUTPUT_DATA_DIR, \"train/\")\n",
@@ -647,7 +609,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -661,10 +623,10 @@
     {
      "data": {
       "text/plain": [
-       "<nvtabular.workflow.workflow.Workflow at 0x7fdacec4fdc0>"
+       "<nvtabular.workflow.workflow.Workflow>"
       ]
      },
-     "execution_count": 15,
+     "execution_count": null,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -676,12 +638,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {
-    "jupyter": {
-     "outputs_hidden": false
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -708,12 +666,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "jupyter": {
-     "outputs_hidden": false
-    }
-   },
+   "execution_count": null,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -745,7 +699,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -754,35 +708,11 @@
   }
  ],
  "metadata": {
-  "file_extension": ".py",
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "python3",
    "language": "python",
    "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
-  },
-  "merlin": {
-   "containers": [
-    "nvcr.io/nvidia/merlin/merlin-hugectr:latest",
-    "nvcr.io/nvidia/merlin/merlin-tensorflow:latest",
-    "nvcr.io/nvidia/merlin/merlin-pytorch:latest"
-   ]
-  },
-  "mimetype": "text/x-python",
-  "npconvert_exporter": "python",
-  "pygments_lexer": "ipython3",
-  "version": 3
+  }
  },
  "nbformat": 4,
  "nbformat_minor": 4

diff --git a/examples/scaling-criteo/03-Training-with-HugeCTR.ipynb b/examples/scaling-criteo/03-Training-with-HugeCTR.ipynb
diff --git a/examples/scaling-criteo/04-Triton-Inference-with-HugeCTR.ipynb b/examples/scaling-criteo/04-Triton-Inference-with-HugeCTR.ipynb
diff --git a/examples/scaling-criteo/README.md b/examples/scaling-criteo/README.md
@@ -6,8 +6,7 @@ We demonstrate how to scale NVTabular, as well as:
 
 - Use multiple GPUs and nodes with NVTabular for feature engineering.
 - Train recommender system models with the Merlin Models for TensorFlow.
-- Train recommender system models with HugeCTR using multiple GPUs.
-- Inference with the Triton Inference Server and Merlin Models for TensorFlow or HugeCTR.
+- Inference with the Triton Inference Server and Merlin Models for TensorFlow.
 
 Our recommendation is to use our latest stable [Merlin containers](https://catalog.ngc.nvidia.com/containers?filters=&orderBy=dateModifiedDESC&query=merlin) for the examples. Each notebook provides the required container.  
 
@@ -18,9 +17,3 @@ Training and Deployment with **TensorFlow**:
 - [Feature Engineering with NVTabular](02-ETL-with-NVTabular.ipynb)
 - [Training with TensorFlow](03-Training-with-Merlin-Models-TensorFlow.ipynb)
 - [Deploy the TensorFlow Model with Triton Inference Server](04-Triton-Inference-with-Merlin-Models-TensorFlow.ipynb)
-
-Training and Deployment with **HugeCTR**:
-- [Download and Convert](01-Download-Convert.ipynb)
-- [Feature Engineering with NVTabular](02-ETL-with-NVTabular.ipynb)
-- [Training with HugeCTR](03-Training-with-HugeCTR.ipynb)
-- [Deploy the HugeCTR Model with Triton Inference Server](04-Triton-Inference-with-HugeCTR.ipynb)