Squashed commit of the following:

commit 85d86fc Author: Tom <[email protected]> Date: Tue Jan 21 11:58:53 2025 +0000 update TODOs commit 8e6b61d Author: Tom Hodson <[email protected]> Date: Tue Jan 21 11:56:51 2025 +0000 disable debug loggin commit 2a0fefd Author: Tom Hodson <[email protected]> Date: Tue Jan 21 11:11:53 2025 +0000 bit of extra logging commit 534b82a Author: Tom Hodson <[email protected]> Date: Mon Jan 20 17:05:49 2025 +0000 small fixes commit d7f8a1b Author: Tom Hodson <[email protected]> Date: Sun Jan 19 16:46:46 2025 +0000 cron stuff commit bb7ba11 Author: Tom Hodson <[email protected]> Date: Wed Dec 18 11:15:14 2024 +0000 additional mappings commit a8588ee Author: Tom Hodson <[email protected]> Date: Tue Dec 17 10:39:39 2024 +0000 update mappings commit eaae287 Author: Tom Hodson <[email protected]> Date: Mon Dec 16 16:43:28 2024 +0000 delete cron logs commit d26932b Author: Tom Hodson <[email protected]> Date: Mon Dec 16 16:43:14 2024 +0000 delete cron logs commit 1fc8414 Author: Tom Hodson <[email protected]> Date: Mon Dec 16 16:42:24 2024 +0000 add cron logs to gitignore commit e257415 Author: Tom <[email protected]> Date: Sun Jan 19 16:28:54 2025 +0000 test commit 7de879e Author: Tom <[email protected]> Date: Sun Jan 19 16:08:42 2025 +0000 nearly there! commit 4b3a61a Author: Tom <[email protected]> Date: Sat Jan 18 14:14:48 2025 +0000 more refactoring commit 63249de Author: Tom <[email protected]> Date: Thu Jan 16 17:23:29 2025 +0000 rewrite commit 325e97d Author: Tom <[email protected]> Date: Thu Jan 9 16:08:12 2025 +0000 lots of stuff commit 411ffc1 Author: Tom <[email protected]> Date: Thu Jan 2 18:38:24 2025 +0100 big refactor commit fea2ed2 Author: Tom <[email protected]> Date: Mon Dec 16 16:41:09 2024 +0000 start working on smart citizen kit commit 72b7ba6 Author: Tom Hodson <[email protected]> Date: Mon Dec 16 15:10:58 2024 +0000 changes commit 94cece6 Author: Tom <[email protected]> Date: Thu Dec 12 17:55:14 2024 +0000 more changes commit 6b7998b Author: Tom Hodson <[email protected]> Date: Thu Dec 12 17:56:02 2024 +0000 fixes commit 48f3235 Author: Tom <[email protected]> Date: Thu Dec 12 11:30:16 2024 +0000 Loads of changes commit 4220093 Author: Tom <[email protected]> Date: Mon Dec 9 18:58:12 2024 +0000 getting acronet working commit 89f3169 Author: Tom Hodson <[email protected]> Date: Mon Dec 9 13:51:37 2024 +0000 ichange commit b191b33 Author: Tom Hodson <[email protected]> Date: Tue Nov 12 15:17:36 2024 +0000 Change default search location of metkit language.yaml commit 9fbd0be Author: Tom <[email protected]> Date: Tue Nov 12 12:02:18 2024 +0000 Update packaging commit 49c3e5c Author: Tom <[email protected]> Date: Tue Nov 12 11:28:32 2024 +0000 Small changes commit 233889b Author: Tom <[email protected]> Date: Mon Nov 4 11:33:19 2024 +0000 add notebooks commit a7aa99b Author: Tom <[email protected]> Date: Wed Oct 9 13:51:58 2024 +0100 Refactoring commit 15b2188 Author: Tom <[email protected]> Date: Wed Oct 2 16:26:42 2024 +0100 move notebooks commit 6069a3e Merge: 2367125 8aa73a8 Author: Tom <[email protected]> Date: Wed Oct 2 11:55:44 2024 +0100 Merge pull request #4 from ecmwf/docs Update docs to support markdown commit 8aa73a8 Author: Tom <[email protected]> Date: Wed Oct 2 11:49:25 2024 +0100 Update docs to support markdown commit 2367125 Author: Tom <[email protected]> Date: Wed Oct 2 10:36:35 2024 +0100 add docs requirements for readthe docs commit da40fcb Author: Tom <[email protected]> Date: Tue Oct 1 14:24:31 2024 +0100 bump readthedocs python build version commit f994a70 Author: Tom <[email protected]> Date: Fri Sep 27 15:02:27 2024 +0200 Strip out SQL namespace stuff commit cec2a6b Author: Tom <[email protected]> Date: Fri Sep 27 14:13:08 2024 +0200 Fix circular import commit b18f503 Author: Tom <[email protected]> Date: Fri Sep 27 14:11:49 2024 +0200 Add back an important import removed by autoformatter commit 02b740f Author: Tom <[email protected]> Date: Fri Sep 27 14:03:06 2024 +0200 Add note to update pyodc dep commit 1dc4e8e Author: Tom <[email protected]> Date: Fri Sep 27 14:02:52 2024 +0200 Fix env override bug
ecmwf · Jan 21, 2025 · 217e204 · 217e204
1 parent a5109d7
commit 217e204
Show file tree

Hide file tree

Showing 133 changed files with 45,491 additions and 10,607 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,4 @@ examples/notebooks/
 .ipynb_checkpoints/
 *.odb
 # *.ipynb
+cron_logs*
diff --git a/readthedocs.yaml → .readthedocs.yaml b/readthedocs.yaml → .readthedocs.yaml
@@ -9,7 +9,7 @@ version: 2
 build:
   os: ubuntu-20.04
   tools:
-    python: "3.9"
+    python: "3.12"
 
 # Build documentation in the docs/ directory with Sphinx
 sphinx:

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,19 @@
+########################################################################################################################
+cmake_minimum_required( VERSION 3.12 FATAL_ERROR )
+find_package( ecbuild 3.7 REQUIRED HINTS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../ecbuild)
+project( GJBundle VERSION 0.0.1 LANGUAGES C CXX )
+########################################################################################################################
+macro( ecbuild_set_verbose )
+    set( ${ARGV} )
+    message( STATUS "SET ${ARGV0} = ${ARGV1}" )
+endmacro()
+########################################################################################################################
+ecbuild_bundle_initialize()
+
+ecbuild_bundle( PROJECT eccodes         GIT "[email protected]:ecmwf/eccodes"                   BRANCH develop  UPDATE)
+ecbuild_bundle( PROJECT eckit           GIT "[email protected]:ecmwf/eckit"                     BRANCH develop  UPDATE)
+ecbuild_bundle( PROJECT odc             GIT "[email protected]:ecmwf/odc"                       BRANCH develop UPDATE)
+ecbuild_bundle( PROJECT metkit          GIT "[email protected]:ecmwf/metkit"                    BRANCH develop UPDATE)
+ecbuild_bundle( PROJECT fdb5            GIT "[email protected]:ecmwf/fdb"                       BRANCH develop MANUAL)
+
+ecbuild_bundle_finalize()
diff --git a/TODO.md b/TODO.md
@@ -0,0 +1,96 @@
+[x] Add ingestion time to ingestion chunk data structure
+[x] add "did you mean" suggestions to subclass not found error message
+[x] Add logic to back off if there has been a 429 error in the last x minutes
+[x] Get the downstream acronet working
+[x] test ingested acronet data against existing data 
+[x] Fix fdb overwriting for acronet
+
+[x] Add logic to download the most recent chunk first then try older ones 
+[x] Add ability for source to modify the time_span of a chunk to only include the data recieved
+[x] Add a flag that a chunk is empty but was completed correctly
+
+[x] port meteotracker to new ingestion system
+on system
+
+[x] debug hang on SimpleODCEncoder 
+[x] write class AddMeteotrackerMetadata(Parser):
+[x] Debug why meteotracker is emitting so many message, probably not filtering correctly
+[x] Allow canonicalise to accept a list of RawVariables
+[x] modify canonicalise to add on the raw variable in this case
+[x] fix location_feature = from_wkt(self.location_feature)
+[x] figure out why the aronet data is now all nones
+[x] Rewrite SCK to give the data out in one big chunk then use the infra I made for acronet
+[x] Split out functionality to create RawVariable columns into separate action
+[x] Sort out unit conversions for SCK because columns with the same name can have different units
+[x] Deal with ownership of data. 
+    [x] Allow actions to assume they own and can mutate input data
+    [x] Modify the processor to give ownership over a message to the first consumer and a copy to all the rest.
+[x] Convert the other two sources to also generate columns of raw Variables?
+
+[x] Maybe collate acronet into one file after all?
+[x] Rewrite metadata adding to use an upsert and share more between sources
+[x] Refactor (remove) generate_metadata and tag_message
+[x] swap out sqlalchemy json serialiser for orjson which supports datetimes and numpy arrays
+[x] change JSON to JSONB in the db
+[x] same for MT 
+    [x] make datetime index
+    [x] add external station id
+    [x] use separate raw variable action
+[x] Add arguments to reingest all data but keep db
+
+[x] When errors are suppressed save them so they can be debugged later with
+```python
+from ionbeam.core.singleprocess_pipeline import load_most_recent_error
+saved_err = load_most_recent_error(config.globals)
+```
+
+[x] get downstream mt working
+[x] port SCK to new ingestion system
+    [x] Try to triger a 429 and check for Retry-After header
+    [ ] Add logic to exponentially back off if 429s keep happening
+
+
+
+[x]  figure out this ['chunk_date',                                   
+                    'chunk_time'] in                                                
+                    msg.metadata.columns but not in                                 
+                    msg.data.columns   error
+
+[x] Deploy download cron jobs to server
+[x] Fix rest api to work with new format
+    [x] add an endpoint to directly get station data saving the filter step?
+
+[ ] fix IonBeam/src/ionbeam/sources/meteotracker/source.py:149: UserWarning: Could not infer 
+format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing 
+is consistent and as-expected, please specify a format.
+
+  data["datetime"] = pd.to_datetime(data["datetime"])
+
+[ ] Figure out under what conditions timespans can become null/None in the sql database
+
+
+[x] Deploy download cron jobs to server
+[x] Fix rest api to work with new format
+[ ] Make it even harder to nuke the ingestion data
+[ ] Add way to wipe just station metadata for one source
+[ ] Allow parsing ingestion times as a cmd line argumennt
+[ ] Add a way to keep track of average time spent on each action.
+
+
+## Longer term:
+[ ] Strip out the concept of metadata entirely and make it all data?
+
+[ ] Swap out the config parsing to use pydantic
+[ ] Swap out the command line arguments and config parsing to use conflator
+
+
+[ ] Fix this code in codc 
+```
+if dtype == STRING:
+    return_arr = return_arr.astype("|S{}".format(max(8, 8 * (1 + ((max(len(s) for s in arr) - 1) // 8)))))
+```
+it doesn't support unicode bytes.
+
+[ ] pyodc does it actually do deduplication?
+[ ] pyodc support deletion
+[ ] update pyodc to take path : typing.Union[str, bytes, os.PathLike] and call os.fspath(path) see https://peps.python.org/pep-0519/
diff --git a/changelog.md b/changelog.md
@@ -0,0 +1,9 @@
+
+Changed station object location and timespan fields from:
+'location': [44.23571, 8.331622],
+ 'time_span': ['2025-01-02T17:10:00Z', '2025-01-02T17:25:00Z'],
+
+ to
+
+'location': {'lat': 44.23571, 'lon': 8.331622},
+'time_span': {'start': '2025-01-02T17:10:00Z', 'end': '2025-01-02T17:25:00Z'},
diff --git a/config/acronet/actions.yaml b/config/acronet/actions.yaml
@@ -0,0 +1,78 @@
+actions:
+  - class: AcronetSource
+    copy_metadata_to_columns:
+      external_station_id: id
+      station_name: name
+      lat: lat
+      lon: lon
+
+  # Compute the column mappings based on the name of each column
+  - class: ComputeColumnMappingsByName
+    mappings: !include mappings.yaml
+
+  - class: SetConstants
+    set:
+      platform: acronet
+      aggregation_type: by_time
+
+  - class: ComputeStationId
+  - class: FormatChecks
+  - class: ComputeChunkDateTime
+
+  - class: CanonicaliseColumns
+    mappings: !include mappings.yaml
+    move_to_front:
+      - platform
+      - author
+      - station_id
+      - external_station_id
+      - station_name
+      - aggregation_type
+      - chunk_date
+      - chunk_time
+      - datetime
+      - lat
+      - lon
+
+  - class: UpdateStationMetadata      
+  - class: ComputeMARSIdentifier
+    lookup:
+      class: const.rd
+      expver: const.xxxx
+      stream: const.lwda
+      aggregation_type: data.aggregation_type
+      platform: data.platform
+      date: data.chunk_date
+      time: data.chunk_time
+      version: const.1
+
+    forward_to_names: ["final_processing"]     
+
+
+  # - class: SplitOnColumnValue
+  #   column: station_id
+
+  # - class: DropNaNColumns
+  # - class: DropNaNRows
+  # - class: DropEmpty
+
+  # - class: ComputeMARSIdentifier
+  #   lookup:
+  #     class: const.rd
+  #     expver: const.xxxx
+  #     stream: const.lwda
+  #     aggregation_type: data.aggregation_type
+  #     platform: data.platform
+  #     date: data.chunk_date
+  #     internal_id: data.station_id
+  #     time: data.chunk_time
+  #     version: const.1
+
+  #   forward_to_names: ["final_processing"]
+
+
+
+
+
+
+
diff --git a/config/acronet/mappings.yaml b/config/acronet/mappings.yaml
@@ -0,0 +1,81 @@
+- name: datetime
+  key: time
+  type: datetime
+
+- name: author
+- name: platform
+- name: external_station_id
+
+- name: start_time
+  key: start
+
+- name: station_name
+
+- name: lat
+  unit: EPSG:4326
+
+- name: lon
+  unit: EPSG:4326
+
+- name: rainfall
+  key: "PLUVIOMETRO [mm]"
+  unit: "mm"
+
+- name: air_temperature_near_surface
+  key: "TERMOMETRO [°C]"
+  unit: "°C"
+
+- name: relative_humidity_near_surface
+  key: "IGROMETRO [%]"
+  unit: "%"
+
+- name: wind_direction_near_surface
+  key: "DIREZIONEVENTO [Degrees]"
+  unit: °
+
+- name: wind_speed_near_surface
+  key: "ANEMOMETRO [m/s]"
+  unit: "m/s"
+
+- name: wind_speed_near_surface
+  key: "ANEMOMETRO [KTS]"
+  unit: "knots"
+
+- name: wind_gust
+  key: "ANEMOMETRO_RAFFICA [KTS]"
+  unit: "knots"
+
+- name: air_pressure_near_surface
+  key: "BAROMETRO [hPa]"
+  unit: "hPa"
+
+- name: solar_radiation
+  key: "RADIOMETRO [W/m^2]"
+  unit: "W/m^2"
+
+- name: battery_level
+  key: "BATTERIA [V]"
+  unit: "V"
+
+- name: internal_temperature
+  key: "TERMOMETRO_INTERNA [°C]"
+  unit: "°C"
+
+- name: wind_gust_direction
+  key: "DIREZIONEVENTO_RAFFICA [Degrees]"
+  unit: °
+
+- name: wind_gust
+  key: "ANEMOMETRO_RAFFICA [m/s]"
+  unit: "m/s"
+
+- name: thermometer_min
+  key: "TERMOMETRO_MIN [°C]"
+  unit: "°C"
+- name: thermometer_max
+  key: "TERMOMETRO_MAX [°C]"
+  unit: "°C"
+
+- name: signal_strength
+  key: "SIGNAL_STRENGTH [CSQ]"
+  unit: "CSQ"
diff --git a/config/actions.yaml b/config/actions.yaml
@@ -0,0 +1,8 @@
+actions:
+  - class: IdentityAction
+    name: final_processing
+
+  - class: ParquetEncoder
+  - class: FDBWriter
+
+