diff --git a/examples/dask/machine-learning-grid-search.ipynb b/examples/dask/machine-learning-grid-search.ipynb index 9e912f6f..bb58d2a2 100644 --- a/examples/dask/machine-learning-grid-search.ipynb +++ b/examples/dask/machine-learning-grid-search.ipynb @@ -110,7 +110,7 @@ }, "outputs": [], "source": [ - "taxi = pd.read_parquet(\"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-05.parquet\")" + "taxi = pd.read_parquet(\"s3://saturn-public-data/nyc-taxi/data/yellow_tripdata_2019-01.parquet\")" ] }, { @@ -308,7 +308,7 @@ "import dask.dataframe as dd\n", "\n", "taxi_dd = dd.read_parquet(\n", - " \"s3://nyc-tlc/trip data/yellow_tripdata_2020-05.parquet\",\n", + " \"s3://saturn-public-data/nyc-taxi/data/yellow_tripdata_2019-01.parquet\",\n", " storage_options={\"anon\": True},\n", " assume_missing=True,\n", ")" diff --git a/examples/dask/special-topics-rolling-average.ipynb b/examples/dask/special-topics-rolling-average.ipynb index 2bec801b..1332afb0 100644 --- a/examples/dask/special-topics-rolling-average.ipynb +++ b/examples/dask/special-topics-rolling-average.ipynb @@ -73,7 +73,7 @@ "import dask.dataframe as dd\n", "\n", "taxi = dd.read_parquet(\n", - " \"s3://nyc-tlc/trip data/yellow_tripdata_2019-01.parquet\",\n", + " \"s3://saturn-public-data/nyc-taxi/data/yellow_tripdata_2019-01.parquet\",\n", " storage_options={\"anon\": True},\n", ").sample(frac=0.1, replace=False)" ] diff --git a/examples/load-data/load-data-s3.ipynb b/examples/load-data/load-data-s3.ipynb index 2d13ace3..d59de26b 100644 --- a/examples/load-data/load-data-s3.ipynb +++ b/examples/load-data/load-data-s3.ipynb @@ -126,7 +126,7 @@ "source": [ "import pandas as pd\n", "\n", - "file = \"nyc-tlc/trip data/yellow_tripdata_2019-01.parquet\"\n", + "file = \"saturn-public-data/nyc-taxi/data/yellow_tripdata_2019-01.parquet\n", "with s3.open(file, mode=\"rb\") as f:\n", " df = pd.read_parquet(f)" ] @@ -149,7 +149,7 @@ "source": [ "import dask.dataframe as dd\n", "\n", - "file = \"nyc-tlc/trip data/yellow_tripdata_2019-01.parquet\"\n", + "file = \"saturn-public-data/nyc-taxi/data/yellow_tripdata_2019-01.parquet\"\n", "with s3.open(file, mode=\"rb\") as f:\n", " df = dd.read_parquet(f)" ] @@ -168,7 +168,7 @@ "metadata": {}, "outputs": [], "source": [ - "files = s3.glob(\"s3://nyc-tlc/trip data/yellow_tripdata_2019-*.parquet\")\n", + "files = s3.glob(\"s3://saturn-public-data/nyc-taxi/data/yellow_tripdata_2019-*.parquet\")\n", "taxi = dd.read_parquet(\n", " files,\n", " storage_options={\"anon\": False},\n", diff --git a/examples/prefect/03-prefect-resource-manager.ipynb b/examples/prefect/03-prefect-resource-manager.ipynb index c6f75790..3520acbb 100644 --- a/examples/prefect/03-prefect-resource-manager.ipynb +++ b/examples/prefect/03-prefect-resource-manager.ipynb @@ -158,9 +158,7 @@ "source": [ "@task\n", "def read():\n", - " taxi = dd.read_parquet(\n", - " \"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-01.parquet\"\n", - " )\n", + " taxi = dd.read_parquet(\"s3://saturn-public-data/nyc-taxi/data/yellow_tripdata_2019-01.parquet\")\n", " df2 = taxi[taxi.passenger_count > 1]\n", " df3 = df2.groupby(\"VendorID\").passenger_count.std()\n", " return df3" @@ -269,7 +267,7 @@ ], "metadata": { "kernelspec": { - "display_name": "saturn (Python 3)", + "display_name": "Python 3.10.2 64-bit", "language": "python", "name": "python3" }, @@ -283,7 +281,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.9" + "version": "3.10.2" + }, + "vscode": { + "interpreter": { + "hash": "b7848b2fbd737d4d16c30c2a265d9cb43a8b0508277d828bf32f61f61a6b4e46" + } } }, "nbformat": 4, diff --git a/examples/rapids-comparison/comparison.ipynb b/examples/rapids-comparison/comparison.ipynb index cbc14d69..84940af9 100644 --- a/examples/rapids-comparison/comparison.ipynb +++ b/examples/rapids-comparison/comparison.ipynb @@ -64,7 +64,7 @@ }, "outputs": [], "source": [ - "!curl https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-01.parquet > data.parquet" + "!curl https://saturn-public-data.s3.us-east-2.amazonaws.com/nyc-taxi/data/yellow_tripdata_2019-01.parquet > data.parquet" ] }, { @@ -279,7 +279,7 @@ "source": [ "with timing(\"GPU + Dask: Random Forest (12x the data)\"):\n", " taxi_dask = dask_cudf.read_parquet(\n", - " \"s3://nyc-tlc/trip data/yellow_tripdata_2019-*.parquet\",\n", + " \"s3://saturn-public-data/nyc-taxi/data/yellow_tripdata_2019-*.parquet\",\n", " storage_options={\"anon\": True},\n", " assume_missing=True,\n", " )\n", diff --git a/examples/rapids/01-rapids-single-gpu.ipynb b/examples/rapids/01-rapids-single-gpu.ipynb index afe5b1ef..6b78263c 100644 --- a/examples/rapids/01-rapids-single-gpu.ipynb +++ b/examples/rapids/01-rapids-single-gpu.ipynb @@ -81,9 +81,7 @@ "metadata": {}, "outputs": [], "source": [ - "taxi = cudf.read_parquet(\n", - " \"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-01.parquet\"\n", - ")" + "taxi = cudf.read_parquet(\"s3://saturn-public-data/nyc-taxi/data/yellow_tripdata_2019-01.parquet\")" ] }, { @@ -281,7 +279,7 @@ "outputs": [], "source": [ "taxi_test = cudf.read_parquet(\n", - " \"https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-02.parquet\"\n", + " \"s3://saturn-public-data/nyc-taxi/data/yellow_tripdata_2019-02.parquet\"\n", ")" ] }, @@ -373,7 +371,7 @@ ], "metadata": { "kernelspec": { - "display_name": "saturn (Python 3)", + "display_name": "Python 3.10.2 64-bit", "language": "python", "name": "python3" }, @@ -387,7 +385,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.10.2" + }, + "vscode": { + "interpreter": { + "hash": "b7848b2fbd737d4d16c30c2a265d9cb43a8b0508277d828bf32f61f61a6b4e46" + } } }, "nbformat": 4, diff --git a/examples/rapids/02-rapids-gpu-cluster.ipynb b/examples/rapids/02-rapids-gpu-cluster.ipynb index c179a575..fe9817a3 100644 --- a/examples/rapids/02-rapids-gpu-cluster.ipynb +++ b/examples/rapids/02-rapids-gpu-cluster.ipynb @@ -135,7 +135,7 @@ "source": [ "taxi = (\n", " dask_cudf.read_parquet(\n", - " \"s3://nyc-tlc/trip data/yellow_tripdata_2019-01.parquet\",\n", + " \"s3://saturn-public-data/nyc-taxi/data/yellow_tripdata_2019-01.parquet\",\n", " storage_options={\"anon\": True},\n", " assume_missing=True,\n", " )\n", @@ -337,7 +337,7 @@ "outputs": [], "source": [ "taxi_test = dask_cudf.read_parquet(\n", - " \"s3://nyc-tlc/trip data/yellow_tripdata_2019-02.parquet\",\n", + " \"s3://saturn-public-data/nyc-taxi/data/yellow_tripdata_2019-02.parquet\",\n", " storage_options={\"anon\": True},\n", " assume_missing=True,\n", ").persist()\n", @@ -423,18 +423,16 @@ "\n", "By only changing a few lines of code, we went from training on a single GPU to a training on a GPU cluster! Wow! \n", "\n", - "Feel free to play around with parameters and the volume of data. You could, for instance, read in and train on all of 2019's taxi data (`yellow_tripdata_2019-*.csv`). *Make sure you test on a different test set!*\n", + "Feel free to play around with parameters and the volume of data. You could, for instance, read in and train on all of 2019's taxi data (`yellow_tripdata_2019-*.parquet`). *Make sure you test on a different test set!*\n", "\n", "Take a look at our other [examples](https://saturncloud.io/docs/examples/) for more resources on running models on single and multiple GPUs!" ] } ], "metadata": { - "interpreter": { - "hash": "0c30809920022c12dc34b6aa5982c47acf3f18a4dd3ede4f803889865384c7fa" - }, "kernelspec": { - "display_name": "Python 3.8.8 64-bit ('base': conda)", + "display_name": "Python 3.10.2 64-bit", + "language": "python", "name": "python3" }, "language_info": { @@ -447,7 +445,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.10.2" + }, + "vscode": { + "interpreter": { + "hash": "b7848b2fbd737d4d16c30c2a265d9cb43a8b0508277d828bf32f61f61a6b4e46" + } } }, "nbformat": 4,