From 55ff1d6637b588b4541830f5f3e15da641d9446c Mon Sep 17 00:00:00 2001 From: Jack Kelly Date: Tue, 17 Sep 2024 16:59:45 +0100 Subject: [PATCH] Updating design.md. #11 --- design.md | 99 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 23 deletions(-) diff --git a/design.md b/design.md index b505f40..9333f8e 100644 --- a/design.md +++ b/design.md @@ -26,7 +26,7 @@ da.sel( ) ``` -xarray converts these coordinated labels to integer indexes: +xarray converts these coordinate labels to integer indexes: ```python da.isel( @@ -40,14 +40,7 @@ da.isel( And `hypergrib` needs to load the appropriate data for these integer indexes. -In `hypergrib` we standardise the interface to different NWPs: - -```rust -trait NWP { - fn get_filename(&self) -> Path; - fn get_byte_offset_and_len(&self) -> ByteOffsetAndLen; -} -``` +In `hypergrib` we standardise the interface to different datasets: We can compute the GRIB filename from the init_time, ensemble_member, and forecast step: @@ -55,36 +48,96 @@ We can compute the GRIB filename from the init_time, ensemble_member, and foreca noaa-gefs-pds/gefs.YYYYMMDD//pgrb2b/gep.tz.pgrb2af ``` -`hypergrib` will cache the information in the `.idx` files in a `BTreeMap`: +`hypergrib` caches the information in the `.idx` files in a `BTreeMap`. + +To create a dataset or to add new information to a dataset: ```rust +#[derive(PartialEq, Eq, PartialOrd, Ord)] struct Key { - init_time, - ensemble_member, - forecast_step, - nwp_variable, - vertical_level, + init_time: Datetime, + ensemble_member: u16, + forecast_step: Timedelta, + nwp_variable: Varable, + vertical_level: VerticalLevel, } -impl Key { - fn to_filename(&self) -> Path { - // implementation +// If the `derive` doesn't work then we can manually implement Ord, something like this: +impl Ord for Key { + fn cmp(&self, other: &Self) -> Ordering { + match self.init_time.cmp(&other.init_time) { + Equal => { + match self.ensemble_member.cmp(&other.ensemble_member) { + Equal => { + ... + }, + m => m, + } + }, + m => m, + } } } -struct ByteOffsetAndLength { +struct OffsetAndLen { byte_offset: u32, msg_length: u32, } -struct GEFS { - manifest: BTreeMap, +struct CoordLabels { + // We're using vectors (not BTreeSet) because the most performance-sensitive + // part of the process is looking up a coord label given an interger index. + // And the only way to do that with a BTreeSet is to first iterate over the elements. + init_time: Vec, + ensemble_member: Vec, + forecast_step: Vec, + nwp_variable: Vec, + vertical_level: Vec, +} + +struct Dataset { + coord_labels: CoordLabels, + manifest: BTreeMap, + +} + +impl Dataset { + fn insert(&mut self, key: Key, offset_and_len: OffsetAndLen) -> Result<(), AlreadyExistsError> { + // Insert into `manifest` and update `coord_labels` iff the new coord doesn't exist yet. + } + + fn coord_labels_to_offset_and_len(&self, key: &Key) -> Option { + self.manifest[key] + } + + fn index_locs_to_key(&self, index: &[u64]) -> Option { + // get key by looking up the appropriate coord labels in self.coord_labels + Some(key) + } +} + +// GEFS-specific code: +fn key_to_gefs_filename(key: &Key) -> Path { + // TODO } -impl NWP for GEFS { - impl get_filename() // ...need to think more about how this'll work! +fn gefs_filename_to_key(path: &Path) -> Key { + // TODO } ``` +To query a dataset: + +```rust + +// Usage example: Create manifest +// TODO + +// Usage example: Convert from coordinate labels to integer indexes + +// Usage example: Get GRIB message for a given key (using labels) + +``` + To satisfy the user's query, we'll loop round all the requested positions, and build a `BTreeMap>`. Which we then grab from storage.