Skip to content

Commit

Permalink
Update Notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
TomHodson committed Jan 21, 2025
1 parent 85d86fc commit b017437
Show file tree
Hide file tree
Showing 3 changed files with 1,547 additions and 162 deletions.
2 changes: 1 addition & 1 deletion TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ is consistent and as-expected, please specify a format.


## Longer term:
[ ] Add a file backed DataChunk type to support Sensor.community data
[ ] Strip out the concept of metadata entirely and make it all data?

[ ] Swap out the config parsing to use pydantic
[ ] Swap out the command line arguments and config parsing to use conflator

Expand Down
190 changes: 190 additions & 0 deletions notebooks/api_tests/Ingestion_Summary.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "ff92dbf6-23da-48a5-881b-a3805983d1c5",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"from datetime import datetime, timedelta, UTC\n",
"import pandas as pd\n",
"from io import BytesIO\n",
"import json\n",
"from collections import defaultdict\n",
"\n",
"session = requests.Session()\n",
"url = \"http://ionbeam-ichange.ecmwf-ichange.f.ewcloud.host/api/v1/\"\n",
"# url = \"http://136.156.130.47/api/v1/\"\n",
"url = \"http://localhost:50310/api/v1/\"\n",
"# url = \"http://localhost:5002/api/v1/\"\n",
"\n",
"def api_get(path, *args, **kwargs):\n",
" r = session.get(url + path, stream=True, *args, **kwargs)\n",
" if not r.ok:\n",
" print(f\"API Error\")\n",
" print(json.dumps(r.json(), indent = 4))\n",
" return r\n",
"\n",
"def sort_by_platform(stations) -> dict[str, list[dict]]:\n",
" by_platform = defaultdict(list)\n",
" for s in stations:\n",
" by_platform[s[\"platform\"]].append(s)\n",
" return by_platform"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "b763af83-dd9e-4e91-acb3-1b2aa3b15783",
"metadata": {},
"outputs": [],
"source": [
"# This can take a long time...\n",
"all_stations = api_get(\"stations\", params = {\n",
"}).json()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "33704dbf-6a3a-46a6-8127-6e447735cdba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"All stations\n",
"{'meteotracker': 18711, 'smart_citizen_kit': 4}\n",
"\n"
]
}
],
"source": [
"print(\"All stations\")\n",
"all_by_platform = sort_by_platform(all_stations)\n",
"print({k : len(v) for k, v in all_by_platform.items()})\n",
"print()"
]
},
{
"cell_type": "markdown",
"id": "ef296878-8b6a-405d-8e03-d16114b028e8",
"metadata": {},
"source": [
"## Total Meteotracker datasets"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b6f3b5c9-4574-49ca-8f5a-a84e1d987111",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Meteotracker: 18711 tracks.\n",
"Earliest ingested data: 2023-01-20 12:23:39.075000+00:00\n",
"Most recent ingested data: 2025-01-21 11:39:59+00:00\n"
]
}
],
"source": [
"dates = [datetime.fromisoformat(s['time_span'][\"end\"]) for s in all_by_platform['meteotracker']]\n",
"earliest, latest = min(dates), max(dates)\n",
"print(f\"Meteotracker: {len(all_by_platform['meteotracker'])} tracks.\")\n",
"print(f\"Earliest ingested data: {earliest}\")\n",
"print(f\"Most recent ingested data: {latest}\")"
]
},
{
"cell_type": "markdown",
"id": "b1268fef-2d7b-4303-b39f-6aded3ac06fb",
"metadata": {},
"source": [
"## Total Acronet datasets"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "e9b4595a-0ece-4c06-bb1f-dcea783781e6",
"metadata": {},
"outputs": [],
"source": [
"def get_all_granules_by_platform(platform):\n",
" granules = api_get(\"list\", params = {'platform': platform}).json()\n",
" dates = [datetime.fromisoformat(d[\"datetime\"]) for d in granules]\n",
" # print(set(g[\"mars_request\"][\"platform\"] for g in granules))\n",
" assert all(g[\"mars_request\"][\"platform\"] == platform for g in granules)\n",
" return granules, dates\n",
"\n",
"acronet_granules, dates = get_all_granules_by_platform(platform = \"acronet\")\n",
"if dates:\n",
" earliest, latest = min(dates), max(dates)\n",
" \n",
" print(f\"Acronet: {len(all_by_platform['acronet'])} distinct stations.\")\n",
" print(f\"Earliest ingested data: {earliest.date()} Most recent ingested data: {latest.date()}\")\n",
" print(f\"Ingested data: {len(acronet_granules)} acronet data granules.\")"
]
},
{
"cell_type": "markdown",
"id": "94eb8132-2676-4906-b255-23baa8fb188f",
"metadata": {},
"source": [
"## Total Smart Citizen Kit Datasets"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "669da433-a867-4ce7-afd6-548f36f5313e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Smart Citizen Kit: 4 distinct stations.\n",
"Earliest ingested data: 2025-01-05 17:00:00+00:00 Most recent ingested data: 2025-01-21 12:00:00+00:00\n",
"Ingested data: 317 data granules.\n"
]
}
],
"source": [
"sck_granules, dates = get_all_granules_by_platform(platform = \"smart_citizen_kit\")\n",
"earliest, latest = min(dates), max(dates)\n",
"\n",
"print(f\"Smart Citizen Kit: {len(all_by_platform['smart_citizen_kit'])} distinct stations.\")\n",
"print(f\"Earliest ingested data: {earliest} Most recent ingested data: {latest}\")\n",
"print(f\"Ingested data: {len(sck_granules)} data granules.\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:micromamba-ionbeam]",
"language": "python",
"name": "conda-env-micromamba-ionbeam-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit b017437

Please sign in to comment.