Update Notebooks

ecmwf · Jan 21, 2025 · b017437 · b017437
1 parent 85d86fc
commit b017437
Show file tree

Hide file tree

Showing 3 changed files with 1,547 additions and 162 deletions.
diff --git a/TODO.md b/TODO.md
@@ -78,8 +78,8 @@ is consistent and as-expected, please specify a format.
 
 
 ## Longer term:
+[ ] Add a file backed DataChunk type to support Sensor.community data
 [ ] Strip out the concept of metadata entirely and make it all data?
-
 [ ] Swap out the config parsing to use pydantic
 [ ] Swap out the command line arguments and config parsing to use conflator
 

diff --git a/notebooks/api_tests/Ingestion_Summary.ipynb b/notebooks/api_tests/Ingestion_Summary.ipynb
@@ -0,0 +1,190 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "ff92dbf6-23da-48a5-881b-a3805983d1c5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "from datetime import datetime, timedelta, UTC\n",
+    "import pandas as pd\n",
+    "from io import BytesIO\n",
+    "import json\n",
+    "from collections import defaultdict\n",
+    "\n",
+    "session = requests.Session()\n",
+    "url = \"http://ionbeam-ichange.ecmwf-ichange.f.ewcloud.host/api/v1/\"\n",
+    "# url = \"http://136.156.130.47/api/v1/\"\n",
+    "url = \"http://localhost:50310/api/v1/\"\n",
+    "# url = \"http://localhost:5002/api/v1/\"\n",
+    "\n",
+    "def api_get(path, *args, **kwargs):\n",
+    "    r = session.get(url + path, stream=True, *args, **kwargs)\n",
+    "    if not r.ok:\n",
+    "        print(f\"API Error\")\n",
+    "        print(json.dumps(r.json(), indent = 4))\n",
+    "    return r\n",
+    "\n",
+    "def sort_by_platform(stations) -> dict[str, list[dict]]:\n",
+    "    by_platform = defaultdict(list)\n",
+    "    for s in stations:\n",
+    "        by_platform[s[\"platform\"]].append(s)\n",
+    "    return by_platform"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "b763af83-dd9e-4e91-acb3-1b2aa3b15783",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This can take a long time...\n",
+    "all_stations = api_get(\"stations\", params = {\n",
+    "}).json()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "33704dbf-6a3a-46a6-8127-6e447735cdba",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "All stations\n",
+      "{'meteotracker': 18711, 'smart_citizen_kit': 4}\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"All stations\")\n",
+    "all_by_platform = sort_by_platform(all_stations)\n",
+    "print({k : len(v) for k, v in all_by_platform.items()})\n",
+    "print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ef296878-8b6a-405d-8e03-d16114b028e8",
+   "metadata": {},
+   "source": [
+    "## Total Meteotracker datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b6f3b5c9-4574-49ca-8f5a-a84e1d987111",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Meteotracker: 18711 tracks.\n",
+      "Earliest ingested data: 2023-01-20 12:23:39.075000+00:00\n",
+      "Most recent ingested data: 2025-01-21 11:39:59+00:00\n"
+     ]
+    }
+   ],
+   "source": [
+    "dates = [datetime.fromisoformat(s['time_span'][\"end\"]) for s in all_by_platform['meteotracker']]\n",
+    "earliest, latest = min(dates), max(dates)\n",
+    "print(f\"Meteotracker: {len(all_by_platform['meteotracker'])} tracks.\")\n",
+    "print(f\"Earliest ingested data: {earliest}\")\n",
+    "print(f\"Most recent ingested data: {latest}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b1268fef-2d7b-4303-b39f-6aded3ac06fb",
+   "metadata": {},
+   "source": [
+    "## Total Acronet datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "e9b4595a-0ece-4c06-bb1f-dcea783781e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_all_granules_by_platform(platform):\n",
+    "    granules = api_get(\"list\", params = {'platform': platform}).json()\n",
+    "    dates = [datetime.fromisoformat(d[\"datetime\"]) for d in granules]\n",
+    "    # print(set(g[\"mars_request\"][\"platform\"] for g in granules))\n",
+    "    assert all(g[\"mars_request\"][\"platform\"] == platform for g in granules)\n",
+    "    return granules, dates\n",
+    "\n",
+    "acronet_granules, dates = get_all_granules_by_platform(platform = \"acronet\")\n",
+    "if dates:\n",
+    "    earliest, latest = min(dates), max(dates)\n",
+    "    \n",
+    "    print(f\"Acronet: {len(all_by_platform['acronet'])} distinct stations.\")\n",
+    "    print(f\"Earliest ingested data: {earliest.date()} Most recent ingested data: {latest.date()}\")\n",
+    "    print(f\"Ingested data: {len(acronet_granules)} acronet data granules.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "94eb8132-2676-4906-b255-23baa8fb188f",
+   "metadata": {},
+   "source": [
+    "## Total Smart Citizen Kit Datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "669da433-a867-4ce7-afd6-548f36f5313e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Smart Citizen Kit: 4 distinct stations.\n",
+      "Earliest ingested data: 2025-01-05 17:00:00+00:00 Most recent ingested data: 2025-01-21 12:00:00+00:00\n",
+      "Ingested data: 317 data granules.\n"
+     ]
+    }
+   ],
+   "source": [
+    "sck_granules, dates = get_all_granules_by_platform(platform = \"smart_citizen_kit\")\n",
+    "earliest, latest = min(dates), max(dates)\n",
+    "\n",
+    "print(f\"Smart Citizen Kit: {len(all_by_platform['smart_citizen_kit'])} distinct stations.\")\n",
+    "print(f\"Earliest ingested data: {earliest} Most recent ingested data: {latest}\")\n",
+    "print(f\"Ingested data: {len(sck_granules)} data granules.\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:micromamba-ionbeam]",
+   "language": "python",
+   "name": "conda-env-micromamba-ionbeam-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}