Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
commit 85d86fc
Author: Tom <[email protected]>
Date:   Tue Jan 21 11:58:53 2025 +0000

    update TODOs

commit 8e6b61d
Author: Tom Hodson <[email protected]>
Date:   Tue Jan 21 11:56:51 2025 +0000

    disable debug loggin

commit 2a0fefd
Author: Tom Hodson <[email protected]>
Date:   Tue Jan 21 11:11:53 2025 +0000

    bit of extra logging

commit 534b82a
Author: Tom Hodson <[email protected]>
Date:   Mon Jan 20 17:05:49 2025 +0000

    small fixes

commit d7f8a1b
Author: Tom Hodson <[email protected]>
Date:   Sun Jan 19 16:46:46 2025 +0000

    cron stuff

commit bb7ba11
Author: Tom Hodson <[email protected]>
Date:   Wed Dec 18 11:15:14 2024 +0000

    additional mappings

commit a8588ee
Author: Tom Hodson <[email protected]>
Date:   Tue Dec 17 10:39:39 2024 +0000

    update mappings

commit eaae287
Author: Tom Hodson <[email protected]>
Date:   Mon Dec 16 16:43:28 2024 +0000

    delete cron logs

commit d26932b
Author: Tom Hodson <[email protected]>
Date:   Mon Dec 16 16:43:14 2024 +0000

    delete cron logs

commit 1fc8414
Author: Tom Hodson <[email protected]>
Date:   Mon Dec 16 16:42:24 2024 +0000

    add cron logs to gitignore

commit e257415
Author: Tom <[email protected]>
Date:   Sun Jan 19 16:28:54 2025 +0000

    test

commit 7de879e
Author: Tom <[email protected]>
Date:   Sun Jan 19 16:08:42 2025 +0000

    nearly there!

commit 4b3a61a
Author: Tom <[email protected]>
Date:   Sat Jan 18 14:14:48 2025 +0000

    more refactoring

commit 63249de
Author: Tom <[email protected]>
Date:   Thu Jan 16 17:23:29 2025 +0000

    rewrite

commit 325e97d
Author: Tom <[email protected]>
Date:   Thu Jan 9 16:08:12 2025 +0000

    lots of stuff

commit 411ffc1
Author: Tom <[email protected]>
Date:   Thu Jan 2 18:38:24 2025 +0100

    big refactor

commit fea2ed2
Author: Tom <[email protected]>
Date:   Mon Dec 16 16:41:09 2024 +0000

    start working on smart citizen kit

commit 72b7ba6
Author: Tom Hodson <[email protected]>
Date:   Mon Dec 16 15:10:58 2024 +0000

    changes

commit 94cece6
Author: Tom <[email protected]>
Date:   Thu Dec 12 17:55:14 2024 +0000

    more changes

commit 6b7998b
Author: Tom Hodson <[email protected]>
Date:   Thu Dec 12 17:56:02 2024 +0000

    fixes

commit 48f3235
Author: Tom <[email protected]>
Date:   Thu Dec 12 11:30:16 2024 +0000

    Loads of changes

commit 4220093
Author: Tom <[email protected]>
Date:   Mon Dec 9 18:58:12 2024 +0000

    getting acronet working

commit 89f3169
Author: Tom Hodson <[email protected]>
Date:   Mon Dec 9 13:51:37 2024 +0000

    ichange

commit b191b33
Author: Tom Hodson <[email protected]>
Date:   Tue Nov 12 15:17:36 2024 +0000

    Change default search location of metkit language.yaml

commit 9fbd0be
Author: Tom <[email protected]>
Date:   Tue Nov 12 12:02:18 2024 +0000

    Update packaging

commit 49c3e5c
Author: Tom <[email protected]>
Date:   Tue Nov 12 11:28:32 2024 +0000

    Small changes

commit 233889b
Author: Tom <[email protected]>
Date:   Mon Nov 4 11:33:19 2024 +0000

    add notebooks

commit a7aa99b
Author: Tom <[email protected]>
Date:   Wed Oct 9 13:51:58 2024 +0100

    Refactoring

commit 15b2188
Author: Tom <[email protected]>
Date:   Wed Oct 2 16:26:42 2024 +0100

    move notebooks

commit 6069a3e
Merge: 2367125 8aa73a8
Author: Tom <[email protected]>
Date:   Wed Oct 2 11:55:44 2024 +0100

    Merge pull request #4 from ecmwf/docs

    Update docs to support markdown

commit 8aa73a8
Author: Tom <[email protected]>
Date:   Wed Oct 2 11:49:25 2024 +0100

    Update docs to support markdown

commit 2367125
Author: Tom <[email protected]>
Date:   Wed Oct 2 10:36:35 2024 +0100

    add docs requirements for readthe docs

commit da40fcb
Author: Tom <[email protected]>
Date:   Tue Oct 1 14:24:31 2024 +0100

    bump readthedocs python build version

commit f994a70
Author: Tom <[email protected]>
Date:   Fri Sep 27 15:02:27 2024 +0200

    Strip out SQL namespace stuff

commit cec2a6b
Author: Tom <[email protected]>
Date:   Fri Sep 27 14:13:08 2024 +0200

    Fix circular import

commit b18f503
Author: Tom <[email protected]>
Date:   Fri Sep 27 14:11:49 2024 +0200

    Add back an important import removed by autoformatter

commit 02b740f
Author: Tom <[email protected]>
Date:   Fri Sep 27 14:03:06 2024 +0200

    Add note to update pyodc dep

commit 1dc4e8e
Author: Tom <[email protected]>
Date:   Fri Sep 27 14:02:52 2024 +0200

    Fix env override bug
  • Loading branch information
TomHodson committed Jan 21, 2025
1 parent a5109d7 commit 217e204
Show file tree
Hide file tree
Showing 133 changed files with 45,491 additions and 10,607 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ examples/notebooks/
.ipynb_checkpoints/
*.odb
# *.ipynb
cron_logs*
2 changes: 1 addition & 1 deletion readthedocs.yaml → .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ version: 2
build:
os: ubuntu-20.04
tools:
python: "3.9"
python: "3.12"

# Build documentation in the docs/ directory with Sphinx
sphinx:
Expand Down
19 changes: 19 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
########################################################################################################################
cmake_minimum_required( VERSION 3.12 FATAL_ERROR )
find_package( ecbuild 3.7 REQUIRED HINTS ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../ecbuild)
project( GJBundle VERSION 0.0.1 LANGUAGES C CXX )
########################################################################################################################
macro( ecbuild_set_verbose )
set( ${ARGV} )
message( STATUS "SET ${ARGV0} = ${ARGV1}" )
endmacro()
########################################################################################################################
ecbuild_bundle_initialize()

ecbuild_bundle( PROJECT eccodes GIT "[email protected]:ecmwf/eccodes" BRANCH develop UPDATE)
ecbuild_bundle( PROJECT eckit GIT "[email protected]:ecmwf/eckit" BRANCH develop UPDATE)
ecbuild_bundle( PROJECT odc GIT "[email protected]:ecmwf/odc" BRANCH develop UPDATE)
ecbuild_bundle( PROJECT metkit GIT "[email protected]:ecmwf/metkit" BRANCH develop UPDATE)
ecbuild_bundle( PROJECT fdb5 GIT "[email protected]:ecmwf/fdb" BRANCH develop MANUAL)

ecbuild_bundle_finalize()
96 changes: 96 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
[x] Add ingestion time to ingestion chunk data structure
[x] add "did you mean" suggestions to subclass not found error message
[x] Add logic to back off if there has been a 429 error in the last x minutes
[x] Get the downstream acronet working
[x] test ingested acronet data against existing data
[x] Fix fdb overwriting for acronet

[x] Add logic to download the most recent chunk first then try older ones
[x] Add ability for source to modify the time_span of a chunk to only include the data recieved
[x] Add a flag that a chunk is empty but was completed correctly

[x] port meteotracker to new ingestion system
on system

[x] debug hang on SimpleODCEncoder
[x] write class AddMeteotrackerMetadata(Parser):
[x] Debug why meteotracker is emitting so many message, probably not filtering correctly
[x] Allow canonicalise to accept a list of RawVariables
[x] modify canonicalise to add on the raw variable in this case
[x] fix location_feature = from_wkt(self.location_feature)
[x] figure out why the aronet data is now all nones
[x] Rewrite SCK to give the data out in one big chunk then use the infra I made for acronet
[x] Split out functionality to create RawVariable columns into separate action
[x] Sort out unit conversions for SCK because columns with the same name can have different units
[x] Deal with ownership of data.
[x] Allow actions to assume they own and can mutate input data
[x] Modify the processor to give ownership over a message to the first consumer and a copy to all the rest.
[x] Convert the other two sources to also generate columns of raw Variables?

[x] Maybe collate acronet into one file after all?
[x] Rewrite metadata adding to use an upsert and share more between sources
[x] Refactor (remove) generate_metadata and tag_message
[x] swap out sqlalchemy json serialiser for orjson which supports datetimes and numpy arrays
[x] change JSON to JSONB in the db
[x] same for MT
[x] make datetime index
[x] add external station id
[x] use separate raw variable action
[x] Add arguments to reingest all data but keep db

[x] When errors are suppressed save them so they can be debugged later with
```python
from ionbeam.core.singleprocess_pipeline import load_most_recent_error
saved_err = load_most_recent_error(config.globals)
```

[x] get downstream mt working
[x] port SCK to new ingestion system
[x] Try to triger a 429 and check for Retry-After header
[ ] Add logic to exponentially back off if 429s keep happening



[x] figure out this ['chunk_date',
'chunk_time'] in
msg.metadata.columns but not in
msg.data.columns error

[x] Deploy download cron jobs to server
[x] Fix rest api to work with new format
[x] add an endpoint to directly get station data saving the filter step?

[ ] fix IonBeam/src/ionbeam/sources/meteotracker/source.py:149: UserWarning: Could not infer
format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing
is consistent and as-expected, please specify a format.

data["datetime"] = pd.to_datetime(data["datetime"])

[ ] Figure out under what conditions timespans can become null/None in the sql database


[x] Deploy download cron jobs to server
[x] Fix rest api to work with new format
[ ] Make it even harder to nuke the ingestion data
[ ] Add way to wipe just station metadata for one source
[ ] Allow parsing ingestion times as a cmd line argumennt
[ ] Add a way to keep track of average time spent on each action.


## Longer term:
[ ] Strip out the concept of metadata entirely and make it all data?

[ ] Swap out the config parsing to use pydantic
[ ] Swap out the command line arguments and config parsing to use conflator


[ ] Fix this code in codc
```
if dtype == STRING:
return_arr = return_arr.astype("|S{}".format(max(8, 8 * (1 + ((max(len(s) for s in arr) - 1) // 8)))))
```
it doesn't support unicode bytes.

[ ] pyodc does it actually do deduplication?
[ ] pyodc support deletion
[ ] update pyodc to take path : typing.Union[str, bytes, os.PathLike] and call os.fspath(path) see https://peps.python.org/pep-0519/
9 changes: 9 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

Changed station object location and timespan fields from:
'location': [44.23571, 8.331622],
'time_span': ['2025-01-02T17:10:00Z', '2025-01-02T17:25:00Z'],

to

'location': {'lat': 44.23571, 'lon': 8.331622},
'time_span': {'start': '2025-01-02T17:10:00Z', 'end': '2025-01-02T17:25:00Z'},
78 changes: 78 additions & 0 deletions config/acronet/actions.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
actions:
- class: AcronetSource
copy_metadata_to_columns:
external_station_id: id
station_name: name
lat: lat
lon: lon

# Compute the column mappings based on the name of each column
- class: ComputeColumnMappingsByName
mappings: !include mappings.yaml

- class: SetConstants
set:
platform: acronet
aggregation_type: by_time

- class: ComputeStationId
- class: FormatChecks
- class: ComputeChunkDateTime

- class: CanonicaliseColumns
mappings: !include mappings.yaml
move_to_front:
- platform
- author
- station_id
- external_station_id
- station_name
- aggregation_type
- chunk_date
- chunk_time
- datetime
- lat
- lon

- class: UpdateStationMetadata
- class: ComputeMARSIdentifier
lookup:
class: const.rd
expver: const.xxxx
stream: const.lwda
aggregation_type: data.aggregation_type
platform: data.platform
date: data.chunk_date
time: data.chunk_time
version: const.1

forward_to_names: ["final_processing"]


# - class: SplitOnColumnValue
# column: station_id

# - class: DropNaNColumns
# - class: DropNaNRows
# - class: DropEmpty

# - class: ComputeMARSIdentifier
# lookup:
# class: const.rd
# expver: const.xxxx
# stream: const.lwda
# aggregation_type: data.aggregation_type
# platform: data.platform
# date: data.chunk_date
# internal_id: data.station_id
# time: data.chunk_time
# version: const.1

# forward_to_names: ["final_processing"]







81 changes: 81 additions & 0 deletions config/acronet/mappings.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
- name: datetime
key: time
type: datetime

- name: author
- name: platform
- name: external_station_id

- name: start_time
key: start

- name: station_name

- name: lat
unit: EPSG:4326

- name: lon
unit: EPSG:4326

- name: rainfall
key: "PLUVIOMETRO [mm]"
unit: "mm"

- name: air_temperature_near_surface
key: "TERMOMETRO [°C]"
unit: "°C"

- name: relative_humidity_near_surface
key: "IGROMETRO [%]"
unit: "%"

- name: wind_direction_near_surface
key: "DIREZIONEVENTO [Degrees]"
unit: °

- name: wind_speed_near_surface
key: "ANEMOMETRO [m/s]"
unit: "m/s"

- name: wind_speed_near_surface
key: "ANEMOMETRO [KTS]"
unit: "knots"

- name: wind_gust
key: "ANEMOMETRO_RAFFICA [KTS]"
unit: "knots"

- name: air_pressure_near_surface
key: "BAROMETRO [hPa]"
unit: "hPa"

- name: solar_radiation
key: "RADIOMETRO [W/m^2]"
unit: "W/m^2"

- name: battery_level
key: "BATTERIA [V]"
unit: "V"

- name: internal_temperature
key: "TERMOMETRO_INTERNA [°C]"
unit: "°C"

- name: wind_gust_direction
key: "DIREZIONEVENTO_RAFFICA [Degrees]"
unit: °

- name: wind_gust
key: "ANEMOMETRO_RAFFICA [m/s]"
unit: "m/s"

- name: thermometer_min
key: "TERMOMETRO_MIN [°C]"
unit: "°C"
- name: thermometer_max
key: "TERMOMETRO_MAX [°C]"
unit: "°C"

- name: signal_strength
key: "SIGNAL_STRENGTH [CSQ]"
unit: "CSQ"
8 changes: 8 additions & 0 deletions config/actions.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
actions:
- class: IdentityAction
name: final_processing

- class: ParquetEncoder
- class: FDBWriter


Loading

0 comments on commit 217e204

Please sign in to comment.