Updating design.md. #11

JackKelly · Sep 17, 2024 · 55ff1d6 · 55ff1d6
1 parent f2f08ea
commit 55ff1d6
Showing 1 changed file with 76 additions and 23 deletions.
diff --git a/design.md b/design.md
@@ -26,7 +26,7 @@ da.sel(
 )
 ```
 
-xarray converts these coordinated labels to integer indexes:
+xarray converts these coordinate labels to integer indexes:
 
 ```python
 da.isel(
@@ -40,51 +40,104 @@ da.isel(
 
 And `hypergrib` needs to load the appropriate data for these integer indexes.
 
-In `hypergrib` we standardise the interface to different NWPs:
-
-```rust
-trait NWP {
-  fn get_filename(&self) -> Path;
-  fn get_byte_offset_and_len(&self) -> ByteOffsetAndLen;
-}
-```
+In `hypergrib` we standardise the interface to different datasets:
 
 We can compute the GRIB filename from the init_time, ensemble_member, and forecast step:
 
 ```
 noaa-gefs-pds/gefs.YYYYMMDD/<init hour>/pgrb2b/gep<ensemble member>.t<init hour>z.pgrb2af<step>
 ```
 
-`hypergrib` will cache the information in the `.idx` files in a `BTreeMap`:
+`hypergrib` caches the information in the `.idx` files in a `BTreeMap`.
+
+To create a dataset or to add new information to a dataset:
 
 ```rust
+#[derive(PartialEq, Eq, PartialOrd, Ord)]
 struct Key {
-  init_time,
-  ensemble_member,
-  forecast_step,
-  nwp_variable,
-  vertical_level,
+  init_time: Datetime,
+  ensemble_member: u16,
+  forecast_step: Timedelta,
+  nwp_variable: Varable,
+  vertical_level: VerticalLevel,
 }
 
-impl Key {
-  fn to_filename(&self) -> Path {
-    // implementation
+// If the `derive` doesn't work then we can manually implement Ord, something like this:
+impl Ord for Key {
+  fn cmp(&self, other: &Self) -> Ordering {
+    match self.init_time.cmp(&other.init_time) {
+      Equal => {
+        match self.ensemble_member.cmp(&other.ensemble_member) {
+          Equal => {
+            ...
+          },
+          m => m,
+        }
+      },
+      m => m,
+    }
   }
 }
 
-struct ByteOffsetAndLength {
+struct OffsetAndLen {
   byte_offset: u32,
   msg_length: u32,
 }
 
-struct GEFS {
-  manifest: BTreeMap<Key, ByteOffsetAndLength>,
+struct CoordLabels {
+  // We're using vectors (not BTreeSet) because the most performance-sensitive
+  // part of the process is looking up a coord label given an interger index.
+  // And the only way to do that with a BTreeSet is to first iterate over the elements.
+  init_time: Vec<Datetime>,
+  ensemble_member: Vec<u16>,
+  forecast_step: Vec<Timedelta>,
+  nwp_variable: Vec<Variable>,
+  vertical_level: Vec<VerticalLevel>,
+}
+
+struct Dataset {
+  coord_labels: CoordLabels,
+  manifest: BTreeMap<Key, OffsetAndLen>,
+
+}
+
+impl Dataset {
+  fn insert(&mut self, key: Key, offset_and_len: OffsetAndLen) -> Result<(), AlreadyExistsError> {
+    // Insert into `manifest` and update `coord_labels` iff the new coord doesn't exist yet.
+  }
+
+  fn coord_labels_to_offset_and_len(&self, key: &Key) -> Option<OffsetAndLen> {
+    self.manifest[key]
+  }
+
+  fn index_locs_to_key(&self, index: &[u64]) -> Option<Key> {
+    // get key by looking up the appropriate coord labels in self.coord_labels
+    Some(key)
+  }
+}
+
+// GEFS-specific code:
+fn key_to_gefs_filename(key: &Key) -> Path {
+  // TODO
 }
 
-impl NWP for GEFS {
-  impl get_filename() // ...need to think more about how this'll work!
+fn gefs_filename_to_key(path: &Path) -> Key {
+  // TODO
 }
 
 ```
 
+To query a dataset:
+
+```rust
+
+// Usage example: Create manifest
+// TODO
+
+// Usage example: Convert from coordinate labels to integer indexes
+
+// Usage example: Get GRIB message for a given key (using labels)
+
+```
+
 To satisfy the user's query, we'll loop round all the requested positions, and build a `BTreeMap<filename, Vec<ByteOffsetAndLen>>`. Which we then grab from storage.