From c7e682cac5f107e7a748ff1293878104f08f4900 Mon Sep 17 00:00:00 2001
From: Emma Turetsky <turetske@gmail.com>
Date: Fri, 25 Oct 2024 09:16:28 -0500
Subject: [PATCH] Uploading tutorials

---
 examples/intake/intake-test.py             |  30 +++
 examples/pytorch/BasePelicanPytorch.ipynb  | 292 +++++++++++++++++++++
 examples/pytorch/pytorch_with_pelicanfs.py |  12 +
 examples/xarray/XArrayWithFSSpec.ipynb     | 263 +++++++++++++++++++
 4 files changed, 597 insertions(+)
 create mode 100644 examples/intake/intake-test.py
 create mode 100644 examples/pytorch/BasePelicanPytorch.ipynb
 create mode 100644 examples/pytorch/pytorch_with_pelicanfs.py
 create mode 100644 examples/xarray/XArrayWithFSSpec.ipynb

diff --git a/examples/intake/intake-test.py b/examples/intake/intake-test.py
new file mode 100644
index 0000000..c5a6baa
--- /dev/null
+++ b/examples/intake/intake-test.py
@@ -0,0 +1,30 @@
+import warnings
+
+warnings.filterwarnings("ignore")
+
+import intake
+import numpy as np
+import pandas as pd
+import xarray as xr
+#import hvplot.pandas, hvplot.xarray
+#import holoviews as hv
+from distributed import LocalCluster, Client
+from ncar_jobqueue import NCARCluster
+#hv.extension('bokeh')
+
+
+if __name__ == '__main__':
+
+    # If not using NCAR HPC, use the LocalCluster
+    #cluster = LocalCluster()
+    cluster = NCARCluster()
+    cluster.scale(10)
+
+    client = Client(cluster)
+
+    catalog = intake.open_esm_datastore(
+        'file://examples/intake/resources/pelican-test-intake.json'
+    )
+
+    catalog_subset = catalog.search(variable='FLNS', frequency='monthly')
+    dsets = catalog_subset.to_dataset_dict()
\ No newline at end of file
diff --git a/examples/pytorch/BasePelicanPytorch.ipynb b/examples/pytorch/BasePelicanPytorch.ipynb
new file mode 100644
index 0000000..4a8dfcb
--- /dev/null
+++ b/examples/pytorch/BasePelicanPytorch.ipynb
@@ -0,0 +1,292 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Training and Evaluating a Model with PyTorch, FSSpec, and Remote CSV Data\n",
+    "\n",
+    "This notebook demonstrates how to train a simple neural network using PyTorch with data read from remote CSV files over HTTPS using `fsspec`. The example includes data pipelines for both training and test datasets and evaluates the model's accuracy on the test set.\n",
+    "\n",
+    "## Install Dependencies\n",
+    "\n",
+    "```python\n",
+    "!pip install torch fsspec pandas torchdata"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import Libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim\n",
+    "import pandas as pd\n",
+    "import fsspec\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define the Nueral Network"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Define a simple feedforward nueral network for the example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class SimpleNN(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        super(SimpleNN, self).__init__()\n",
+    "        self.fc1 = nn.Linear(784, 50)\n",
+    "        self.fc2 = nn.Linear(50, 600)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = torch.relu(self.fc1(x))\n",
+    "        x = self.fc2(x)\n",
+    "        return x\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define the Custom Dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a custom dataset for PyTorch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class CSVDataset(Dataset):\n",
+    "    def __init__(self, data):\n",
+    "        self.data = data\n",
+    "    \n",
+    "    def __len__(self):\n",
+    "        return len(self.data)\n",
+    "    \n",
+    "    def __getitem__(self, index):\n",
+    "        return self.data[index]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define Functions to Read and Process Remote CSV Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This will use the fsspec to read and process data.\n",
+    "\n",
+    "Note that this notebook isn't using the fsspec handling functions built into torchdata.datapipes because that package is being deprecated"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_csv_from_url(file_url):\n",
+    "    # Create a filesystem object for HTTPS\n",
+    "    fs = fsspec.filesystem('osdf')\n",
+    "    # Open the remote file\n",
+    "    with fs.open(file_url, 'r') as f:\n",
+    "        # Read the file into a pandas DataFrame\n",
+    "        df = pd.read_csv(f, index_col=False)\n",
+    "    return df\n",
+    "\n",
+    "def dataframe_to_dataset(df):\n",
+    "    features = df.iloc[:, :-1].values.astype(np.float32)  # Assuming last column is target\n",
+    "    targets = df.iloc[:, -1].values.astype(np.int64)\n",
+    "    dataset = [(torch.tensor(feature), torch.tensor(target)) for feature, target in zip(features, targets)]\n",
+    "    return dataset\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prepare the Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Get the data remotely from Pelican using fsspec with the 'osdf' protocol. (Note that the OSDF protocol is a specific version of PelicanFS with the discoverURL alreayd set)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define remote file URLs\n",
+    "train_csv_url = '/chtc/PUBLIC/hzhao292/fashion-mnist_train.csv'\n",
+    "test_csv_url = '/chtc/PUBLIC/hzhao292/fashion-mnist_test.csv'\n",
+    "\n",
+    "# Read and convert data\n",
+    "train_df = read_csv_from_url(train_csv_url)\n",
+    "test_df = read_csv_from_url(test_csv_url)\n",
+    "train_data = dataframe_to_dataset(train_df)\n",
+    "test_data = dataframe_to_dataset(test_df)\n",
+    "\n",
+    "# Create DataLoaders\n",
+    "train_dataset = CSVDataset(train_data)\n",
+    "test_dataset = CSVDataset(test_data)\n",
+    "train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)\n",
+    "test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Train the model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Train our example model using the data from Pelican."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "IndexError",
+     "evalue": "Target 8 is out of bounds.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[18], line 14\u001b[0m\n\u001b[1;32m     12\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[1;32m     13\u001b[0m outputs \u001b[38;5;241m=\u001b[39m model(batch_X)\n\u001b[0;32m---> 14\u001b[0m loss \u001b[38;5;241m=\u001b[39m \u001b[43mcriterion\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch_y\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     15\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward()\n\u001b[1;32m     16\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mstep()\n",
+      "File \u001b[0;32m~/pelican/PelicanPytorchTutorial/.venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1551\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/pelican/PelicanPytorchTutorial/.venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1560\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1561\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1565\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m~/pelican/PelicanPytorchTutorial/.venv/lib/python3.9/site-packages/torch/nn/modules/loss.py:1188\u001b[0m, in \u001b[0;36mCrossEntropyLoss.forward\u001b[0;34m(self, input, target)\u001b[0m\n\u001b[1;32m   1187\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor, target: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[0;32m-> 1188\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcross_entropy\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1189\u001b[0m \u001b[43m                           \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mignore_index\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreduction\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreduction\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1190\u001b[0m \u001b[43m                           \u001b[49m\u001b[43mlabel_smoothing\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlabel_smoothing\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/pelican/PelicanPytorchTutorial/.venv/lib/python3.9/site-packages/torch/nn/functional.py:3104\u001b[0m, in \u001b[0;36mcross_entropy\u001b[0;34m(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)\u001b[0m\n\u001b[1;32m   3102\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m size_average \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m reduce \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   3103\u001b[0m     reduction \u001b[38;5;241m=\u001b[39m _Reduction\u001b[38;5;241m.\u001b[39mlegacy_get_string(size_average, reduce)\n\u001b[0;32m-> 3104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_C\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_nn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcross_entropy_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_Reduction\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_enum\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreduction\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabel_smoothing\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mIndexError\u001b[0m: Target 8 is out of bounds."
+     ]
+    }
+   ],
+   "source": [
+    "# Instantiate model, loss function, and optimizer\n",
+    "model = SimpleNN()\n",
+    "criterion = nn.CrossEntropyLoss()\n",
+    "optimizer = optim.SGD(model.parameters(), lr=0.01)\n",
+    "\n",
+    "# Training loop\n",
+    "epochs = 5\n",
+    "for epoch in range(epochs):\n",
+    "    model.train()\n",
+    "    running_loss = 0.0\n",
+    "    for batch_X, batch_y in train_loader:\n",
+    "        optimizer.zero_grad()\n",
+    "        outputs = model(batch_X)\n",
+    "        loss = criterion(outputs, batch_y)\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        running_loss += loss.item() * batch_X.size(0)\n",
+    "    \n",
+    "    epoch_loss = running_loss / len(train_loader.dataset)\n",
+    "    print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluate the Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Evaluate the accuracy of the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.eval()\n",
+    "correct = 0\n",
+    "total = 0\n",
+    "with torch.no_grad():\n",
+    "    for batch_X, batch_y in test_loader:\n",
+    "        outputs = model(batch_X)\n",
+    "        _, predicted = torch.max(outputs, 1)\n",
+    "        total += batch_y.size(0)\n",
+    "        correct += (predicted == batch_y).sum().item()\n",
+    "\n",
+    "accuracy = correct / total\n",
+    "print(f'Accuracy on test data: {accuracy:.4f}')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/pytorch/pytorch_with_pelicanfs.py b/examples/pytorch/pytorch_with_pelicanfs.py
new file mode 100644
index 0000000..c358aa5
--- /dev/null
+++ b/examples/pytorch/pytorch_with_pelicanfs.py
@@ -0,0 +1,12 @@
+import torch
+torch.utils.data.datapipes.utils.common.DILL_AVAILABLE = torch.utils._import_utils.dill_available()
+from torchdata.datapipes.iter import IterableWrapper
+
+
+if __name__ == '__main__':
+    dp = IterableWrapper(["osdf:///chtc/PUBLIC/eturetsky/data/faces/"]).list_files_by_fsspec()
+    print(list(dp))
+
+    dp = IterableWrapper(["osdf:///chtc/PUBLIC/eturetsky/data/faces/"]).open_files_by_fsspec()
+    for path, filestream in dp:
+        print(path, filestream)
\ No newline at end of file
diff --git a/examples/xarray/XArrayWithFSSpec.ipynb b/examples/xarray/XArrayWithFSSpec.ipynb
new file mode 100644
index 0000000..1f30357
--- /dev/null
+++ b/examples/xarray/XArrayWithFSSpec.ipynb
@@ -0,0 +1,263 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# HRRR 2m Temperature Analysis\n",
+    "\n",
+    "This notebook demonstrates how to load, process, and visualize 2-meter temperature data from HRRR using Xarray, Cartopy, and Matplotlib.\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "We'll start by importing the necessary libraries."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ImportError",
+     "evalue": "cannot import name 'OSDFFileSystem' from 'pelicanfs' (/Users/useradmin/pelican/PelicanPytorchTutorial/.venv/lib/python3.9/site-packages/pelicanfs/__init__.py)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[2], line 8\u001b[0m\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmetpy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcalc\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mmpcalc\u001b[39;00m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmetpy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01munits\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m units\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpelicanfs\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OSDFFileSystem\n",
+      "\u001b[0;31mImportError\u001b[0m: cannot import name 'OSDFFileSystem' from 'pelicanfs' (/Users/useradmin/pelican/PelicanPytorchTutorial/.venv/lib/python3.9/site-packages/pelicanfs/__init__.py)"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "import xarray as xr\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import cartopy.crs as ccrs\n",
+    "import cartopy.feature as cfeature\n",
+    "import metpy.calc as mpcalc\n",
+    "from metpy.units import units\n",
+    "from pelicanfs.core import OSDFFileSystem"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define File Paths\n",
+    "\n",
+    "Define the date, hour, variable, and level for the HRRR data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set the date, hour, variable, and level for the HRRR data\n",
+    "date = '20211016'\n",
+    "hour = '21'\n",
+    "var = 'TMP'\n",
+    "level = '2m_above_ground'\n",
+    "\n",
+    "# Construct file paths for the Zarr datasets\n",
+    "namespace_file1 = f'/chtc/PUBLIC/eturetsky/hrrrzarr/sfc/{date}/{date}_{hour}z_anl.zarr/{level}/{var}/{level}/'\n",
+    "namespace_file2 = f'/chtc/PUBLIC/eturetsky/hrrrzarr/sfc/{date}/{date}_{hour}z_anl.zarr/{level}/{var}/'\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Open Zarr Datasets\n",
+    "\n",
+    "Use OSDFFileSystem to open the Zarr datasets and read them with Xarray."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize the OSDFFileSystem\n",
+    "xfs = OSDFFileSystem()\n",
+    "\n",
+    "# Get mappers for the Zarr datasets\n",
+    "file1 = xfs.get_mapper(namespace_file1)\n",
+    "file2 = xfs.get_mapper(namespace_file2)\n",
+    "\n",
+    "# Open the datasets\n",
+    "ds = xr.open_mfdataset([file1, file2], engine='zarr')\n",
+    "\n",
+    "# Display the dataset\n",
+    "ds"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define Projection and Coordinate System\n",
+    "\n",
+    "Set up the projection and coordinate system for plotting."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define coordinates for projection\n",
+    "lon1 = -97.5\n",
+    "lat1 = 38.5\n",
+    "slat = 38.5\n",
+    "\n",
+    "# Define the Lambert Conformal projection\n",
+    "projData = ccrs.LambertConformal(\n",
+    "    central_longitude=lon1,\n",
+    "    central_latitude=lat1,\n",
+    "    standard_parallels=[slat, slat],\n",
+    "    globe=ccrs.Globe(\n",
+    "        semimajor_axis=6371229,\n",
+    "        semiminor_axis=6371229\n",
+    "    )\n",
+    ")\n",
+    "\n",
+    "# Display dataset coordinates\n",
+    "ds.coords"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Extract and Convert Data\n",
+    "\n",
+    "Extract temperature data and convert its units"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extract temperature data\n",
+    "airTemp = ds.TMP\n",
+    "\n",
+    "# Display the temperature data\n",
+    "airTemp\n",
+    "\n",
+    "# Convert temperature units to Celsius\n",
+    "airTemp = airTemp.metpy.convert_units('degC')\n",
+    "\n",
+    "# Display the converted temperature data\n",
+    "airTemp\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Plot Temperature Data\n",
+    "\n",
+    "Create a plot of the temperature data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extract projection coordinates\n",
+    "x = airTemp.projection_x_coordinate\n",
+    "y = airTemp.projection_y_coordinate\n",
+    "\n",
+    "# Plot temperature data\n",
+    "airTemp.plot(figsize=(11, 8.5))\n",
+    "\n",
+    "# Compute minimum and maximum temperatures\n",
+    "minTemp = airTemp.min().compute()\n",
+    "maxTemp = airTemp.max().compute()\n",
+    "\n",
+    "# Display minimum and maximum temperature values\n",
+    "minTemp.values, maxTemp.values\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define Contour Levels and Plot\n",
+    "\n",
+    "Set up contour levels and plot the temperature data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define contour levels\n",
+    "fint = np.arange(np.floor(minTemp.values), np.ceil(maxTemp.values) + 2, 2)\n",
+    "\n",
+    "# Define plot bounds and resolution\n",
+    "latN = 50.4\n",
+    "latS = 24.25\n",
+    "lonW = -123.8\n",
+    "lonE = -71.2\n",
+    "res = '50m'\n",
+    "\n",
+    "# Create a figure and axis with projection\n",
+    "fig = plt.figure(figsize=(18, 12))\n",
+    "ax = plt.subplot(1, 1, 1, projection=projData)\n",
+    "ax.set_extent([lonW, lonE, latS, latN], crs=ccrs.PlateCarree())\n",
+    "ax.add_feature(cfeature.COASTLINE.with_scale(res))\n",
+    "ax.add_feature(cfeature.STATES.with_scale(res))\n",
+    "\n",
+    "# Add the title\n",
+    "tl1 = 'HRRR 2m temperature ($^\\circ$C)'\n",
+    "tl2 = f'Analysis valid at: {hour}00 UTC {date}'\n",
+    "plt.title(f'{tl1}\\n{tl2}', fontsize=16)\n",
+    "\n",
+    "# Contour fill\n",
+    "CF = ax.contourf(x, y, airTemp, levels=fint, cmap=plt.get_cmap('coolwarm'))\n",
+    "\n",
+    "# Make a colorbar for the ContourSet returned by the contourf call\n",
+    "cbar = fig.colorbar(CF, shrink=0.5)\n",
+    "cbar.set_label(r'2m Temperature ($^\\circ$C)', size='large')\n",
+    "\n",
+    "# Show the plot\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}