From 0dbfce15fbc8c302865f2981c68f9bac4eebb00f Mon Sep 17 00:00:00 2001 From: david-i-berry Date: Wed, 22 Nov 2023 22:29:49 +0100 Subject: [PATCH 1/3] Change in behaviour, WSI no longer estimated by default, arg now required in call to transform / as_geojson. Updated tests Removal of remote ref in schema. --- bufr2geojson/__init__.py | 105 +++++++++++------- .../schemas/wmo-om-profile-geojson.yaml | 65 ++++++++++- tests/test_bufr2geojson.py | 6 +- 3 files changed, 128 insertions(+), 48 deletions(-) diff --git a/bufr2geojson/__init__.py b/bufr2geojson/__init__.py index cd5ede3..643aabd 100644 --- a/bufr2geojson/__init__.py +++ b/bufr2geojson/__init__.py @@ -418,14 +418,14 @@ def get_time(self) -> str: return time_ - def get_wsi(self) -> str: + def get_wsi(self, guess_wsi: bool = False) -> str: """ Function returns WIGOS station ID as string :returns: WIGOS station ID. """ - return self.get_identification()["wsi"] + return self.get_identification(guess_wsi)["wsi"] def get_tsi(self) -> str: """ @@ -436,13 +436,16 @@ def get_tsi(self) -> str: return self.get_identification()["tsi"] - def get_identification(self) -> dict: + def get_identification(self, guess_wsi: bool = False) -> dict: """ Function extracts identification information from qualifiers. :returns: dictionary containing any class 01 qualifiers and WSI as dict. # noqa """ + # default WSI value + wsi = None + # see https://library.wmo.int/doc_num.php?explnum_id=11021 # page 19 for allocation of WSI if not set # check to see what identification we have @@ -468,13 +471,17 @@ def get_identification(self) -> dict: if all(x in self.qualifiers["01"] for x in _types): # noqa block = self.get_qualifier("01", "block_number") station = self.get_qualifier("01", "station_number") - wsi_series = 0 - wsi_issuer = 20000 - wsi_number = 0 - wsi_local = strip2(f"{block:02d}{station:03d}") + tsi = strip2(f"{block:02d}{station:03d}") + if guess_wsi: + wsi_series = 0 + wsi_issuer = 20000 + wsi_number = 0 + wsi_local = tsi + wsi = f"{wsi_series}-{wsi_issuer}-{wsi_number}-{wsi_local}" + return { - "wsi": f"{wsi_series}-{wsi_issuer}-{wsi_number}-{wsi_local}", - "tsi": wsi_local, + "wsi": wsi, + "tsi": tsi, "type": "{}_and_{}".format(*_types) } @@ -482,13 +489,17 @@ def get_identification(self) -> dict: _type = "ship_or_mobile_land_station_identifier" if _type in self.qualifiers["01"]: callsign = self.get_qualifier("01", _type) - wsi_series = 0 - wsi_issuer = 20004 - wsi_number = 0 - wsi_local = strip2(callsign) + tsi = strip2(callsign) + if guess_wsi: + wsi_series = 0 + wsi_issuer = 20004 + wsi_number = 0 + wsi_local = tsi + wsi = f"{wsi_series}-{wsi_issuer}-{wsi_number}-{wsi_local}" + return { - "wsi": f"{wsi_series}-{wsi_issuer}-{wsi_number}-{wsi_local}", - "tsi": wsi_local, + "wsi": wsi, + "tsi": tsi, "type": _type } @@ -500,13 +511,17 @@ def get_identification(self) -> dict: wmo_region = self.get_qualifier("region_number") wmo_subregion = self.get_qualifier("wmo_region_sub_area") wmo_number = self.get_qualifier("buoy_or_platform_identifier") - wsi_series = 0 - wsi_issuer = 20002 - wsi_number = 0 - wsi_local = strip2(f"{wmo_region:01d}{wmo_subregion:01d}{wmo_number:05d}") # noqa + tsi = strip2(f"{wmo_region:01d}{wmo_subregion:01d}{wmo_number:05d}") # noqa + if guess_wsi: + wsi_series = 0 + wsi_issuer = 20002 + wsi_number = 0 + wsi_local = tsi # noqa + wsi = f"{wsi_series}-{wsi_issuer}-{wsi_number}-{wsi_local}" + return { - "wsi": f"{wsi_series}-{wsi_issuer}-{wsi_number}-{wsi_local}", - "tsi": wsi_local, + "wsi": wsi, + "tsi": tsi, "type": "5_digit_marine_observing_platform_identifier" } @@ -515,13 +530,17 @@ def get_identification(self) -> dict: _type = "stationary_buoy_platform_identifier_e_g_c_man_buoys" if _type in self.qualifiers["01"]: id_ = self.get_qualifier("01", _type) - wsi_series = 0 - wsi_issuer = 20002 - wsi_number = 0 - wsi_local = strip2(id_) + tsi = strip2(id_) + if guess_wsi: + wsi_series = 0 + wsi_issuer = 20002 + wsi_number = 0 + wsi_local = tsi + wsi = f"{wsi_series}-{wsi_issuer}-{wsi_number}-{wsi_local}" + return { - "wsi": f"{wsi_series}-{wsi_issuer}-{wsi_number}-{wsi_local}", - "tsi": wsi_local, + "wsi": wsi, + "tsi": tsi, "type": _type } @@ -530,13 +549,17 @@ def get_identification(self) -> dict: _type = "marine_observing_platform_identifier" if _type in self.qualifiers["01"]: id_ = self.get_qualifier("01", _type) - wsi_series = 0 - wsi_issuer = 20002 - wsi_number = 0 - wsi_local = strip2(id_) + tsi = strip2(id_) + if guess_wsi: + wsi_series = 0 + wsi_issuer = 20002 + wsi_number = 0 + wsi_local = tsi + wsi = f"{wsi_series}-{wsi_issuer}-{wsi_number}-{wsi_local}" + return { - "wsi": f"{wsi_series}-{wsi_issuer}-{wsi_number}-{wsi_local}", - "tsi": wsi_local, + "wsi": wsi, + "tsi": tsi, "type": "7_digit_marine_observing_platform_identifier" } @@ -577,7 +600,7 @@ def get_code_value(self, fxxyyy: str, code: int) -> str: return decoded def as_geojson(self, bufr_handle: int, id: str, - serialize: bool = False) -> dict: + serialize: bool = False, guess_wsi: bool = False) -> dict: """ Function to return GeoJSON representation of BUFR message @@ -730,7 +753,7 @@ def as_geojson(self, bufr_handle: int, id: str, attributes, append) continue if value is not None: - self.get_identification() + # self.get_identification() metadata = self.get_qualifiers() metadata_hash = hashlib.md5(json.dumps(metadata).encode("utf-8")).hexdigest() # noqa md = { @@ -739,7 +762,7 @@ def as_geojson(self, bufr_handle: int, id: str, } for idx in range(len(metadata)): md["metadata"].append(metadata[idx]) - wsi = self.get_wsi() + wsi = self.get_wsi(guess_wsi) feature_id = f"WIGOS_{wsi}_{characteristic_date}T{characteristic_time}" # noqa feature_id = f"{feature_id}{id}-{index}" phenomenon_time = self.get_time() @@ -787,12 +810,14 @@ def as_geojson(self, bufr_handle: int, id: str, return data -def transform(data: bytes, serialize: bool = False) -> Iterator[dict]: +def transform(data: bytes, serialize: bool = False, + guess_wsi: bool = False) -> Iterator[dict]: """ Main transformation :param data: byte string of BUFR data :param serialize: whether to return as JSON string (default is False) + :param guess_wsi: whether to 'guess' WSI based on TSI and allocaiotn rules :returns: `generator` of GeoJSON features """ @@ -804,8 +829,7 @@ def transform(data: bytes, serialize: bool = False) -> Iterator[dict]: with open(tmp.name, 'wb') as f: f.write(data) - # check data type, only in situ supported - # not yet implemented + # check data type, only in situ supported (not yet implemented) # split subsets into individual messages and process imsg = 0 messages_remaining = True @@ -847,7 +871,8 @@ def transform(data: bytes, serialize: bool = False) -> Iterator[dict]: tag = f"-{idx}" try: data = parser.as_geojson(single_subset, id=tag, - serialize=serialize) + serialize=serialize, + guess_wsi=guess_wsi) # noqa except Exception as e: LOGGER.error("Error parsing BUFR to GeoJSON, no data written") # noqa diff --git a/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml b/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml index 2f48623..be0d2f7 100644 --- a/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml +++ b/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml @@ -2,13 +2,66 @@ $schema: 'https://json-schema.org/draft/2020-12/schema' $id: 'https://raw.githubusercontent.com/wmo-im/bufr2geojson/main/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml' title: WMO GeoJSON profile for observations and measurements description: WMO GeoJSON profile for observations and measurements -allOf: - - $ref: 'https://raw.githubusercontent.com/opengeospatial/ogcapi-features/master/core/openapi/schemas/featureGeoJSON.yaml' +required: + - type + - geometry + - properties properties: + type: + type: string + enum: + - Feature + geometry: + oneOf: + - type: object + required: + - type + - coordinates + properties: + type: + type: string + enum: + - Point + coordinates: + type: array + minItems: 2 + maxItems: 3 + items: + type: number + - type: 'null' + id: + type: + - string + - integer + links: + type: array + items: + type: object + required: + - href + - rel + properties: + href: + type: string + example: http://data.example.com/buildings/123 + rel: + type: string + example: alternate + type: + type: string + example: application/geo+json + hreflang: + type: string + example: en + title: + type: string + example: Trierer Strasse 70, 53115 Bonn + length: + type: integer conformsTo: + type: array items: - enum: - - 'http://www.wmo.int/spec/om-profile-1/1.0/req/geojson' + type: string properties: type: object properties: @@ -16,7 +69,9 @@ properties: type: string description: feature identifier wigos_station_identifier: - type: string + type: + - 'null' + - string description: WIGOS station identifier for station making observations phenomenonTime: type: string diff --git a/tests/test_bufr2geojson.py b/tests/test_bufr2geojson.py index ef684a2..ccf4fbb 100644 --- a/tests/test_bufr2geojson.py +++ b/tests/test_bufr2geojson.py @@ -22,7 +22,6 @@ from __future__ import annotations import base64 import itertools - from jsonschema import validate, FormatChecker import pytest import yaml @@ -128,7 +127,7 @@ def geojson_output(): def test_multi(multimsg_bufr): - results = transform(multimsg_bufr) + results = transform(multimsg_bufr, guess_wsi=True) # count number of geojsons icount = 0 for res in results: @@ -140,7 +139,8 @@ def test_multi(multimsg_bufr): def test_transform(geojson_schema, geojson_output): test_bufr_file = 'A_ISIA21EIDB202100_C_EDZW_20220320210902_11839953.bin' with open(test_bufr_file, 'rb') as fh: - messages1, messages2 = itertools.tee(transform(fh.read())) + messages1, messages2 = itertools.tee(transform(fh.read(), + guess_wsi=True)) # validate against JSON Schema for message in messages1: From 7f7bb1b2e5bb507793a322e3941a350299c294a9 Mon Sep 17 00:00:00 2001 From: david-i-berry Date: Tue, 6 Feb 2024 12:32:14 +0100 Subject: [PATCH 2/3] Revert of JSON schema. --- .../schemas/wmo-om-profile-geojson.yaml | 67 ++----------------- 1 file changed, 6 insertions(+), 61 deletions(-) diff --git a/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml b/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml index be0d2f7..234c6fa 100644 --- a/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml +++ b/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml @@ -2,66 +2,13 @@ $schema: 'https://json-schema.org/draft/2020-12/schema' $id: 'https://raw.githubusercontent.com/wmo-im/bufr2geojson/main/bufr2geojson/resources/schemas/wmo-om-profile-geojson.yaml' title: WMO GeoJSON profile for observations and measurements description: WMO GeoJSON profile for observations and measurements -required: - - type - - geometry - - properties +allOf: + - $ref: 'https://raw.githubusercontent.com/opengeospatial/ogcapi-features/master/core/openapi/schemas/featureGeoJSON.yaml' properties: - type: - type: string - enum: - - Feature - geometry: - oneOf: - - type: object - required: - - type - - coordinates - properties: - type: - type: string - enum: - - Point - coordinates: - type: array - minItems: 2 - maxItems: 3 - items: - type: number - - type: 'null' - id: - type: - - string - - integer - links: - type: array - items: - type: object - required: - - href - - rel - properties: - href: - type: string - example: http://data.example.com/buildings/123 - rel: - type: string - example: alternate - type: - type: string - example: application/geo+json - hreflang: - type: string - example: en - title: - type: string - example: Trierer Strasse 70, 53115 Bonn - length: - type: integer conformsTo: - type: array items: - type: string + enum: + - 'http://www.wmo.int/spec/om-profile-1/1.0/req/geojson' properties: type: object properties: @@ -69,9 +16,7 @@ properties: type: string description: feature identifier wigos_station_identifier: - type: - - 'null' - - string + type: string description: WIGOS station identifier for station making observations phenomenonTime: type: string @@ -154,4 +99,4 @@ examples: type: application/geo+json hreflang: en title: 'Trierer Strasse 70, 53115 Bonn' - length: 0 + length: 0 \ No newline at end of file From 88f9e7897500aa2b40cf55cc0b6cd1dfb1f6bdf3 Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Tue, 6 Feb 2024 06:35:29 -0500 Subject: [PATCH 3/3] Update test_bufr2geojson.py --- tests/test_bufr2geojson.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_bufr2geojson.py b/tests/test_bufr2geojson.py index ccf4fbb..0a9f086 100644 --- a/tests/test_bufr2geojson.py +++ b/tests/test_bufr2geojson.py @@ -22,6 +22,7 @@ from __future__ import annotations import base64 import itertools + from jsonschema import validate, FormatChecker import pytest import yaml