From c7e682cac5f107e7a748ff1293878104f08f4900 Mon Sep 17 00:00:00 2001 From: Emma Turetsky Date: Fri, 25 Oct 2024 09:16:28 -0500 Subject: [PATCH] Uploading tutorials --- examples/intake/intake-test.py | 30 +++ examples/pytorch/BasePelicanPytorch.ipynb | 292 +++++++++++++++++++++ examples/pytorch/pytorch_with_pelicanfs.py | 12 + examples/xarray/XArrayWithFSSpec.ipynb | 263 +++++++++++++++++++ 4 files changed, 597 insertions(+) create mode 100644 examples/intake/intake-test.py create mode 100644 examples/pytorch/BasePelicanPytorch.ipynb create mode 100644 examples/pytorch/pytorch_with_pelicanfs.py create mode 100644 examples/xarray/XArrayWithFSSpec.ipynb diff --git a/examples/intake/intake-test.py b/examples/intake/intake-test.py new file mode 100644 index 0000000..c5a6baa --- /dev/null +++ b/examples/intake/intake-test.py @@ -0,0 +1,30 @@ +import warnings + +warnings.filterwarnings("ignore") + +import intake +import numpy as np +import pandas as pd +import xarray as xr +#import hvplot.pandas, hvplot.xarray +#import holoviews as hv +from distributed import LocalCluster, Client +from ncar_jobqueue import NCARCluster +#hv.extension('bokeh') + + +if __name__ == '__main__': + + # If not using NCAR HPC, use the LocalCluster + #cluster = LocalCluster() + cluster = NCARCluster() + cluster.scale(10) + + client = Client(cluster) + + catalog = intake.open_esm_datastore( + 'file://examples/intake/resources/pelican-test-intake.json' + ) + + catalog_subset = catalog.search(variable='FLNS', frequency='monthly') + dsets = catalog_subset.to_dataset_dict() \ No newline at end of file diff --git a/examples/pytorch/BasePelicanPytorch.ipynb b/examples/pytorch/BasePelicanPytorch.ipynb new file mode 100644 index 0000000..4a8dfcb --- /dev/null +++ b/examples/pytorch/BasePelicanPytorch.ipynb @@ -0,0 +1,292 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training and Evaluating a Model with PyTorch, FSSpec, and Remote CSV Data\n", + "\n", + "This notebook demonstrates how to train a simple neural network using PyTorch with data read from remote CSV files over HTTPS using `fsspec`. The example includes data pipelines for both training and test datasets and evaluates the model's accuracy on the test set.\n", + "\n", + "## Install Dependencies\n", + "\n", + "```python\n", + "!pip install torch fsspec pandas torchdata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "import pandas as pd\n", + "import fsspec\n", + "from torch.utils.data import Dataset, DataLoader\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define the Nueral Network" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define a simple feedforward nueral network for the example" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "class SimpleNN(nn.Module):\n", + " def __init__(self):\n", + " super(SimpleNN, self).__init__()\n", + " self.fc1 = nn.Linear(784, 50)\n", + " self.fc2 = nn.Linear(50, 600)\n", + "\n", + " def forward(self, x):\n", + " x = torch.relu(self.fc1(x))\n", + " x = self.fc2(x)\n", + " return x\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define the Custom Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a custom dataset for PyTorch" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "class CSVDataset(Dataset):\n", + " def __init__(self, data):\n", + " self.data = data\n", + " \n", + " def __len__(self):\n", + " return len(self.data)\n", + " \n", + " def __getitem__(self, index):\n", + " return self.data[index]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define Functions to Read and Process Remote CSV Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will use the fsspec to read and process data.\n", + "\n", + "Note that this notebook isn't using the fsspec handling functions built into torchdata.datapipes because that package is being deprecated" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def read_csv_from_url(file_url):\n", + " # Create a filesystem object for HTTPS\n", + " fs = fsspec.filesystem('osdf')\n", + " # Open the remote file\n", + " with fs.open(file_url, 'r') as f:\n", + " # Read the file into a pandas DataFrame\n", + " df = pd.read_csv(f, index_col=False)\n", + " return df\n", + "\n", + "def dataframe_to_dataset(df):\n", + " features = df.iloc[:, :-1].values.astype(np.float32) # Assuming last column is target\n", + " targets = df.iloc[:, -1].values.astype(np.int64)\n", + " dataset = [(torch.tensor(feature), torch.tensor(target)) for feature, target in zip(features, targets)]\n", + " return dataset\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare the Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Get the data remotely from Pelican using fsspec with the 'osdf' protocol. (Note that the OSDF protocol is a specific version of PelicanFS with the discoverURL alreayd set)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# Define remote file URLs\n", + "train_csv_url = '/chtc/PUBLIC/hzhao292/fashion-mnist_train.csv'\n", + "test_csv_url = '/chtc/PUBLIC/hzhao292/fashion-mnist_test.csv'\n", + "\n", + "# Read and convert data\n", + "train_df = read_csv_from_url(train_csv_url)\n", + "test_df = read_csv_from_url(test_csv_url)\n", + "train_data = dataframe_to_dataset(train_df)\n", + "test_data = dataframe_to_dataset(test_df)\n", + "\n", + "# Create DataLoaders\n", + "train_dataset = CSVDataset(train_data)\n", + "test_dataset = CSVDataset(test_data)\n", + "train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)\n", + "test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train the model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Train our example model using the data from Pelican." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "ename": "IndexError", + "evalue": "Target 8 is out of bounds.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[18], line 14\u001b[0m\n\u001b[1;32m 12\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[1;32m 13\u001b[0m outputs \u001b[38;5;241m=\u001b[39m model(batch_X)\n\u001b[0;32m---> 14\u001b[0m loss \u001b[38;5;241m=\u001b[39m \u001b[43mcriterion\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch_y\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 15\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward()\n\u001b[1;32m 16\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mstep()\n", + "File \u001b[0;32m~/pelican/PelicanPytorchTutorial/.venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/pelican/PelicanPytorchTutorial/.venv/lib/python3.9/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m~/pelican/PelicanPytorchTutorial/.venv/lib/python3.9/site-packages/torch/nn/modules/loss.py:1188\u001b[0m, in \u001b[0;36mCrossEntropyLoss.forward\u001b[0;34m(self, input, target)\u001b[0m\n\u001b[1;32m 1187\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor, target: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[0;32m-> 1188\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcross_entropy\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1189\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mignore_index\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreduction\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreduction\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1190\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabel_smoothing\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlabel_smoothing\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/pelican/PelicanPytorchTutorial/.venv/lib/python3.9/site-packages/torch/nn/functional.py:3104\u001b[0m, in \u001b[0;36mcross_entropy\u001b[0;34m(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)\u001b[0m\n\u001b[1;32m 3102\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m size_average \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m reduce \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 3103\u001b[0m reduction \u001b[38;5;241m=\u001b[39m _Reduction\u001b[38;5;241m.\u001b[39mlegacy_get_string(size_average, reduce)\n\u001b[0;32m-> 3104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_C\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_nn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcross_entropy_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_Reduction\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_enum\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreduction\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabel_smoothing\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mIndexError\u001b[0m: Target 8 is out of bounds." + ] + } + ], + "source": [ + "# Instantiate model, loss function, and optimizer\n", + "model = SimpleNN()\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = optim.SGD(model.parameters(), lr=0.01)\n", + "\n", + "# Training loop\n", + "epochs = 5\n", + "for epoch in range(epochs):\n", + " model.train()\n", + " running_loss = 0.0\n", + " for batch_X, batch_y in train_loader:\n", + " optimizer.zero_grad()\n", + " outputs = model(batch_X)\n", + " loss = criterion(outputs, batch_y)\n", + " loss.backward()\n", + " optimizer.step()\n", + " running_loss += loss.item() * batch_X.size(0)\n", + " \n", + " epoch_loss = running_loss / len(train_loader.dataset)\n", + " print(f'Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate the Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Evaluate the accuracy of the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.eval()\n", + "correct = 0\n", + "total = 0\n", + "with torch.no_grad():\n", + " for batch_X, batch_y in test_loader:\n", + " outputs = model(batch_X)\n", + " _, predicted = torch.max(outputs, 1)\n", + " total += batch_y.size(0)\n", + " correct += (predicted == batch_y).sum().item()\n", + "\n", + "accuracy = correct / total\n", + "print(f'Accuracy on test data: {accuracy:.4f}')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/pytorch/pytorch_with_pelicanfs.py b/examples/pytorch/pytorch_with_pelicanfs.py new file mode 100644 index 0000000..c358aa5 --- /dev/null +++ b/examples/pytorch/pytorch_with_pelicanfs.py @@ -0,0 +1,12 @@ +import torch +torch.utils.data.datapipes.utils.common.DILL_AVAILABLE = torch.utils._import_utils.dill_available() +from torchdata.datapipes.iter import IterableWrapper + + +if __name__ == '__main__': + dp = IterableWrapper(["osdf:///chtc/PUBLIC/eturetsky/data/faces/"]).list_files_by_fsspec() + print(list(dp)) + + dp = IterableWrapper(["osdf:///chtc/PUBLIC/eturetsky/data/faces/"]).open_files_by_fsspec() + for path, filestream in dp: + print(path, filestream) \ No newline at end of file diff --git a/examples/xarray/XArrayWithFSSpec.ipynb b/examples/xarray/XArrayWithFSSpec.ipynb new file mode 100644 index 0000000..1f30357 --- /dev/null +++ b/examples/xarray/XArrayWithFSSpec.ipynb @@ -0,0 +1,263 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# HRRR 2m Temperature Analysis\n", + "\n", + "This notebook demonstrates how to load, process, and visualize 2-meter temperature data from HRRR using Xarray, Cartopy, and Matplotlib.\n", + "\n", + "## Setup\n", + "\n", + "We'll start by importing the necessary libraries." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "ename": "ImportError", + "evalue": "cannot import name 'OSDFFileSystem' from 'pelicanfs' (/Users/useradmin/pelican/PelicanPytorchTutorial/.venv/lib/python3.9/site-packages/pelicanfs/__init__.py)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 8\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmetpy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcalc\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mmpcalc\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmetpy\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01munits\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m units\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpelicanfs\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OSDFFileSystem\n", + "\u001b[0;31mImportError\u001b[0m: cannot import name 'OSDFFileSystem' from 'pelicanfs' (/Users/useradmin/pelican/PelicanPytorchTutorial/.venv/lib/python3.9/site-packages/pelicanfs/__init__.py)" + ] + } + ], + "source": [ + "\n", + "import xarray as xr\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import cartopy.crs as ccrs\n", + "import cartopy.feature as cfeature\n", + "import metpy.calc as mpcalc\n", + "from metpy.units import units\n", + "from pelicanfs.core import OSDFFileSystem" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define File Paths\n", + "\n", + "Define the date, hour, variable, and level for the HRRR data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set the date, hour, variable, and level for the HRRR data\n", + "date = '20211016'\n", + "hour = '21'\n", + "var = 'TMP'\n", + "level = '2m_above_ground'\n", + "\n", + "# Construct file paths for the Zarr datasets\n", + "namespace_file1 = f'/chtc/PUBLIC/eturetsky/hrrrzarr/sfc/{date}/{date}_{hour}z_anl.zarr/{level}/{var}/{level}/'\n", + "namespace_file2 = f'/chtc/PUBLIC/eturetsky/hrrrzarr/sfc/{date}/{date}_{hour}z_anl.zarr/{level}/{var}/'\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Open Zarr Datasets\n", + "\n", + "Use OSDFFileSystem to open the Zarr datasets and read them with Xarray." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the OSDFFileSystem\n", + "xfs = OSDFFileSystem()\n", + "\n", + "# Get mappers for the Zarr datasets\n", + "file1 = xfs.get_mapper(namespace_file1)\n", + "file2 = xfs.get_mapper(namespace_file2)\n", + "\n", + "# Open the datasets\n", + "ds = xr.open_mfdataset([file1, file2], engine='zarr')\n", + "\n", + "# Display the dataset\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define Projection and Coordinate System\n", + "\n", + "Set up the projection and coordinate system for plotting." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define coordinates for projection\n", + "lon1 = -97.5\n", + "lat1 = 38.5\n", + "slat = 38.5\n", + "\n", + "# Define the Lambert Conformal projection\n", + "projData = ccrs.LambertConformal(\n", + " central_longitude=lon1,\n", + " central_latitude=lat1,\n", + " standard_parallels=[slat, slat],\n", + " globe=ccrs.Globe(\n", + " semimajor_axis=6371229,\n", + " semiminor_axis=6371229\n", + " )\n", + ")\n", + "\n", + "# Display dataset coordinates\n", + "ds.coords" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extract and Convert Data\n", + "\n", + "Extract temperature data and convert its units" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Extract temperature data\n", + "airTemp = ds.TMP\n", + "\n", + "# Display the temperature data\n", + "airTemp\n", + "\n", + "# Convert temperature units to Celsius\n", + "airTemp = airTemp.metpy.convert_units('degC')\n", + "\n", + "# Display the converted temperature data\n", + "airTemp\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot Temperature Data\n", + "\n", + "Create a plot of the temperature data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Extract projection coordinates\n", + "x = airTemp.projection_x_coordinate\n", + "y = airTemp.projection_y_coordinate\n", + "\n", + "# Plot temperature data\n", + "airTemp.plot(figsize=(11, 8.5))\n", + "\n", + "# Compute minimum and maximum temperatures\n", + "minTemp = airTemp.min().compute()\n", + "maxTemp = airTemp.max().compute()\n", + "\n", + "# Display minimum and maximum temperature values\n", + "minTemp.values, maxTemp.values\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define Contour Levels and Plot\n", + "\n", + "Set up contour levels and plot the temperature data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define contour levels\n", + "fint = np.arange(np.floor(minTemp.values), np.ceil(maxTemp.values) + 2, 2)\n", + "\n", + "# Define plot bounds and resolution\n", + "latN = 50.4\n", + "latS = 24.25\n", + "lonW = -123.8\n", + "lonE = -71.2\n", + "res = '50m'\n", + "\n", + "# Create a figure and axis with projection\n", + "fig = plt.figure(figsize=(18, 12))\n", + "ax = plt.subplot(1, 1, 1, projection=projData)\n", + "ax.set_extent([lonW, lonE, latS, latN], crs=ccrs.PlateCarree())\n", + "ax.add_feature(cfeature.COASTLINE.with_scale(res))\n", + "ax.add_feature(cfeature.STATES.with_scale(res))\n", + "\n", + "# Add the title\n", + "tl1 = 'HRRR 2m temperature ($^\\circ$C)'\n", + "tl2 = f'Analysis valid at: {hour}00 UTC {date}'\n", + "plt.title(f'{tl1}\\n{tl2}', fontsize=16)\n", + "\n", + "# Contour fill\n", + "CF = ax.contourf(x, y, airTemp, levels=fint, cmap=plt.get_cmap('coolwarm'))\n", + "\n", + "# Make a colorbar for the ContourSet returned by the contourf call\n", + "cbar = fig.colorbar(CF, shrink=0.5)\n", + "cbar.set_label(r'2m Temperature ($^\\circ$C)', size='large')\n", + "\n", + "# Show the plot\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}