diff --git a/applications/rag/example_notebooks/rag-kaggle-ray-sql-interactive.ipynb b/applications/rag/example_notebooks/rag-kaggle-ray-sql-interactive.ipynb index 750bd9749..2b80e437e 100644 --- a/applications/rag/example_notebooks/rag-kaggle-ray-sql-interactive.ipynb +++ b/applications/rag/example_notebooks/rag-kaggle-ray-sql-interactive.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "304bba57-cdb4-42d5-a4b7-2494b6cfa4ff", + "id": "5574f366-58e9-408b-aea4-1bf5b3351e4c", "metadata": {}, "source": [ "# RAG-on-GKE Application\n", @@ -12,38 +12,19 @@ "\n", "## Setup Kaggle Credentials\n", "\n", - "First we will setup your Kaggle credentials. Replace the following with your own settings from the Kaggle web page. Navigate to https://www.kaggle.com/settings/account and generate an API token to be used to setup the env variable. See https://www.kaggle.com/docs/api#authentication how to create one." + "First we will setup your Kaggle credentials and use the Kaggle CLI to download the NetFlix shows dataset to the GCS bucket. Replace the following with your own settings from the Kaggle web page. Navigate to https://www.kaggle.com/settings/account and generate an API token to be used to setup the env variable. See https://www.kaggle.com/docs/api#authentication how to create one." ] }, { "cell_type": "code", "execution_count": null, - "id": "00b1aff4", - "metadata": {}, - "outputs": [], - "source": [ - "KAGGLE_USERNAME = \"\"\n", - "KAGGLE_KEY = \"\"" - ] - }, - { - "cell_type": "markdown", - "id": "a81ab34e-a0ad-4340-8d04-45e9ce4c7416", - "metadata": {}, - "source": [ - "Now we will use the Kaggle CLI to download our data to the mounted GCS bucket:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1e26faef-9e2e-4793-b8af-0e18470b482d", + "id": "ffee2bec-804f-4e22-9ba0-8b1db5a5d7ec", "metadata": {}, "outputs": [], "source": [ "import os\n", - "os.environ['KAGGLE_USERNAME'] = KAGGLE_USERNAME\n", - "os.environ['KAGGLE_KEY'] = KAGGLE_KEY\n", + "os.environ['KAGGLE_USERNAME'] = \"\"\n", + "os.environ['KAGGLE_KEY'] = \"\"\n", "\n", "# Download the zip file to local storage and then extract the desired contents directly to the GKE GCS CSI mounted bucket. The bucket is mounted at the \"/persist-data\" path in the jupyter pod.\n", "!kaggle datasets download -d shivamb/netflix-shows -p ~/data --force\n",