From 730b882f49692339c67980403706da297b491a37 Mon Sep 17 00:00:00 2001 From: antoine-de Date: Wed, 23 Mar 2022 10:49:47 +0100 Subject: [PATCH] Poc to have typed object indexes Same as https://github.com/rust-transit/gtfs-structure/pull/123 but without generational_indexes. I think the indexes are really great for performance since: * all structures are then `Vector` (great random access and cache friendly iterations) * the ids are short (integer vs string) I don't think we need those performance in this crate though. Having only typed index (a think wrapper over the real gtfs identifier) would be more convenient I think. It would: * ease debug (easy to print the gtfs identifier, instead of having a meaningless integer) * ease serialisation (same, we can serialize the string right away) * be a more more close to the gtfs representation This is still *very* early WIP, I'm not convinced at all by the ergonomics, I'd like to keep the property of the Index in https://github.com/rust-transit/gtfs-structure/pull/123 that the `Id` have at least existed at one point (even if I don't plan to ensure this if an object is deleted). --- Cargo.toml | 2 +- src/gtfs.rs | 62 ++++++++++++++++++++++++++------------------------ src/id.rs | 31 +++++++++++++++++++++++++ src/lib.rs | 2 ++ src/objects.rs | 54 +++++++++++++++++++------------------------ 5 files changed, 90 insertions(+), 61 deletions(-) create mode 100644 src/id.rs diff --git a/Cargo.toml b/Cargo.toml index 781861c..16e6742 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ read-url = ["reqwest", "futures"] [dependencies] bytes = "1" csv = "1.1" -derivative = "2.1" +derivative = "2.2.0" serde = "1.0" serde_derive = "1.0" chrono = "0.4" diff --git a/src/gtfs.rs b/src/gtfs.rs index 6859ea1..aa63c66 100644 --- a/src/gtfs.rs +++ b/src/gtfs.rs @@ -1,9 +1,8 @@ -use crate::{objects::*, Error, RawGtfs}; +use crate::{objects::*, Error, Id, RawGtfs}; use chrono::prelude::NaiveDate; use chrono::Duration; use std::collections::{HashMap, HashSet}; use std::convert::TryFrom; -use std::sync::Arc; /// Data structure with all the GTFS objects /// @@ -27,8 +26,8 @@ pub struct Gtfs { pub calendar: HashMap, /// All calendar dates grouped by service_id pub calendar_dates: HashMap>, - /// All stop by `stop_id`. Stops are in an [Arc] because they are also referenced by each [StopTime] - pub stops: HashMap>, + /// All stop by `stop_id` + pub stops: HashMap, Stop>, /// All routes by `route_id` pub routes: HashMap, /// All trips by `trip_id` @@ -49,12 +48,12 @@ impl TryFrom for Gtfs { /// /// It might fail if some mandatory files couldn’t be read or if there are references to other objects that are invalid. fn try_from(raw: RawGtfs) -> Result { + let frequencies = raw.frequencies.unwrap_or_else(|| Ok(Vec::new()))?; let stops = to_stop_map( raw.stops?, raw.transfers.unwrap_or_else(|| Ok(Vec::new()))?, raw.pathways.unwrap_or(Ok(Vec::new()))?, )?; - let frequencies = raw.frequencies.unwrap_or_else(|| Ok(Vec::new()))?; let trips = create_trips(raw.trips?, raw.stop_times?, frequencies, &stops)?; Ok(Gtfs { @@ -175,10 +174,10 @@ impl Gtfs { } /// Gets a [Stop] by its `stop_id` - pub fn get_stop<'a>(&'a self, id: &str) -> Result<&'a Stop, Error> { + pub fn get_stop<'a>(&'a self, id: &Id) -> Result<&'a Stop, Error> { match self.stops.get(id) { Some(stop) => Ok(stop), - None => Err(Error::ReferenceError(id.to_owned())), + None => Err(Error::ReferenceError(id.to_string())), } } @@ -225,7 +224,7 @@ impl Gtfs { } } -fn to_map(elements: impl IntoIterator) -> HashMap { +fn to_map(elements: impl IntoIterator) -> HashMap { elements .into_iter() .map(|e| (e.id().to_owned(), e)) @@ -236,33 +235,35 @@ fn to_stop_map( stops: Vec, raw_transfers: Vec, raw_pathways: Vec, -) -> Result>, Error> { - let mut stop_map: HashMap = - stops.into_iter().map(|s| (s.id.clone(), s)).collect(); - +) -> Result, Stop>, Error> { + let mut stop_map = HashMap::, Stop>::default(); + for s in stops.into_iter() { + stop_map.insert(Id::must_exists(s.id.clone()), s); + } for transfer in raw_transfers { + // Note: I'm not convinced at all by this Id::must_exists... + // we shouldn't have to allocate here, and we must find a way to really ensure that the id exists (or remove the verbosity) stop_map - .get(&transfer.to_stop_id) + .get(&Id::must_exists(transfer.to_stop_id.clone())) .ok_or_else(|| Error::ReferenceError(transfer.to_stop_id.to_string()))?; - stop_map - .entry(transfer.from_stop_id.clone()) - .and_modify(|stop| stop.transfers.push(StopTransfer::from(transfer))); + let to_stop_id = Id::must_exists(transfer.to_stop_id.clone()); + let s = stop_map + .get_mut(&Id::must_exists(transfer.from_stop_id.clone())) + .ok_or_else(|| Error::ReferenceError(transfer.from_stop_id.to_string()))?; + s.transfers.push(StopTransfer::from((transfer, to_stop_id))); } for pathway in raw_pathways { stop_map - .get(&pathway.to_stop_id) + .get(&Id::must_exists(pathway.to_stop_id.clone())) .ok_or_else(|| Error::ReferenceError(pathway.to_stop_id.to_string()))?; - stop_map - .entry(pathway.from_stop_id.clone()) - .and_modify(|stop| stop.pathways.push(Pathway::from(pathway))); + let s = stop_map + .get_mut(&Id::must_exists(pathway.from_stop_id.clone())) + .ok_or_else(|| Error::ReferenceError(pathway.to_stop_id.to_string()))?; + let stop_id = Id::must_exists(pathway.to_stop_id.clone()); + s.pathways.push(Pathway::from((pathway, stop_id))); } - - let res = stop_map - .into_iter() - .map(|(i, s)| (i, Arc::new(s))) - .collect(); - Ok(res) + Ok(stop_map) } fn to_shape_map(shapes: Vec) -> HashMap> { @@ -292,7 +293,7 @@ fn create_trips( raw_trips: Vec, raw_stop_times: Vec, raw_frequencies: Vec, - stops: &HashMap>, + stops: &HashMap, Stop>, ) -> Result, Error> { let mut trips = to_map(raw_trips.into_iter().map(|rt| Trip { id: rt.id, @@ -312,10 +313,11 @@ fn create_trips( let trip = &mut trips .get_mut(&s.trip_id) .ok_or_else(|| Error::ReferenceError(s.trip_id.to_string()))?; + let stop_id = Id::must_exists(s.stop_id.clone()); let stop = stops - .get(&s.stop_id) - .ok_or_else(|| Error::ReferenceError(s.stop_id.to_string()))?; - trip.stop_times.push(StopTime::from(&s, Arc::clone(stop))); + .get(&stop_id) + .ok_or_else(|| Error::ReferenceError(stop_id.to_string()))?; + trip.stop_times.push(StopTime::from(&s, stop_id)); } for trip in &mut trips.values_mut() { diff --git a/src/id.rs b/src/id.rs new file mode 100644 index 0000000..ee0baf4 --- /dev/null +++ b/src/id.rs @@ -0,0 +1,31 @@ +use core::marker::PhantomData; +#[derive(Derivative)] +#[derive(Serialize, Deserialize)] +#[derivative(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub struct Id { + id: String, + #[serde(skip)] + #[derivative(Debug(bound = ""))] + #[derivative(Debug = "ignore")] + #[derivative(Clone(bound = ""))] + #[derivative(Eq(bound = ""))] + #[derivative(PartialEq(bound = ""))] + #[derivative(Hash(bound = ""))] + _phantom: PhantomData, +} + +impl Id { + pub fn must_exists(s: String) -> Id { + Id { + id: s, + _phantom: PhantomData, + } + } +} + +impl std::ops::Deref for Id { + type Target = str; + fn deref(&self) -> &str { + &self.id + } +} diff --git a/src/lib.rs b/src/lib.rs index ccbd3ff..188e021 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,6 +47,7 @@ mod enums; pub mod error; mod gtfs; mod gtfs_reader; +mod id; pub(crate) mod objects; mod raw_gtfs; mod serde_helpers; @@ -57,5 +58,6 @@ mod tests; pub use error::Error; pub use gtfs::Gtfs; pub use gtfs_reader::GtfsReader; +pub use id::Id; pub use objects::*; pub use raw_gtfs::RawGtfs; diff --git a/src/objects.rs b/src/objects.rs index 8e4b079..97a93c0 100644 --- a/src/objects.rs +++ b/src/objects.rs @@ -1,5 +1,5 @@ pub use crate::enums::*; -use crate::serde_helpers::*; +use crate::{serde_helpers::*, Id}; use chrono::{Datelike, NaiveDate, Weekday}; use rgb::RGB8; @@ -9,17 +9,11 @@ use std::sync::Arc; /// Objects that have an identifier implement this trait /// /// Those identifier are technical and should not be shown to travellers -pub trait Id { +pub trait WithId { /// Identifier of the object fn id(&self) -> &str; } -impl Id for Arc { - fn id(&self) -> &str { - self.as_ref().id() - } -} - /// Trait to introspect what is the object’s type (stop, route…) pub trait Type { /// What is the type of the object @@ -100,7 +94,7 @@ impl Type for Calendar { } } -impl Id for Calendar { +impl WithId for Calendar { fn id(&self) -> &str { &self.id } @@ -199,7 +193,7 @@ impl Type for Stop { } } -impl Id for Stop { +impl WithId for Stop { fn id(&self) -> &str { &self.id } @@ -258,14 +252,14 @@ pub struct RawStopTime { } /// The moment where a vehicle, running on [Trip] stops at a [Stop]. See -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)] pub struct StopTime { /// Arrival time of the stop time. /// It's an option since the intermediate stops can have have no arrival /// and this arrival needs to be interpolated pub arrival_time: Option, /// [Stop] where the vehicle stops - pub stop: Arc, + pub stop: Id, /// Departure time of the stop time. /// It's an option since the intermediate stops can have have no departure /// and this departure needs to be interpolated @@ -290,7 +284,7 @@ pub struct StopTime { impl StopTime { /// Creates [StopTime] by linking a [RawStopTime::stop_id] to the actual [Stop] - pub fn from(stop_time_gtfs: &RawStopTime, stop: Arc) -> Self { + pub fn from(stop_time_gtfs: &RawStopTime, stop: Id) -> Self { Self { arrival_time: stop_time_gtfs.arrival_time, departure_time: stop_time_gtfs.departure_time, @@ -362,7 +356,7 @@ impl Type for Route { } } -impl Id for Route { +impl WithId for Route { fn id(&self) -> &str { &self.id } @@ -412,7 +406,7 @@ impl Type for RawTrip { } } -impl Id for RawTrip { +impl WithId for RawTrip { fn id(&self) -> &str { &self.id } @@ -463,7 +457,7 @@ impl Type for Trip { } } -impl Id for Trip { +impl WithId for Trip { fn id(&self) -> &str { &self.id } @@ -514,7 +508,7 @@ impl Type for Agency { } } -impl Id for Agency { +impl WithId for Agency { fn id(&self) -> &str { match &self.id { None => "", @@ -555,7 +549,7 @@ impl Type for Shape { } } -impl Id for Shape { +impl WithId for Shape { fn id(&self) -> &str { &self.id } @@ -582,7 +576,7 @@ pub struct FareAttribute { pub transfer_duration: Option, } -impl Id for FareAttribute { +impl WithId for FareAttribute { fn id(&self) -> &str { &self.id } @@ -655,22 +649,22 @@ pub struct RawTransfer { pub min_transfer_time: Option, } -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone)] /// Transfer information between stops pub struct StopTransfer { /// Stop which to transfer to - pub to_stop_id: String, + pub to_stop_id: Id, /// Type of the transfer pub transfer_type: TransferType, /// Minimum time needed to make the transfer in seconds pub min_transfer_time: Option, } -impl From for StopTransfer { +impl From<(RawTransfer, Id)> for StopTransfer { /// Converts from a [RawTransfer] to a [StopTransfer] - fn from(transfer: RawTransfer) -> Self { + fn from((transfer, to_stop_id): (RawTransfer, Id)) -> Self { Self { - to_stop_id: transfer.to_stop_id, + to_stop_id: to_stop_id, transfer_type: transfer.transfer_type, min_transfer_time: transfer.min_transfer_time, } @@ -755,12 +749,12 @@ pub struct RawPathway { } /// Pathway going from a stop to another. -#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct Pathway { /// Uniquely identifies the pathway pub id: String, /// Location at which the pathway ends - pub to_stop_id: String, + pub to_stop_id: Id, /// Type of pathway between the specified (from_stop_id, to_stop_id) pair pub mode: PathwayMode, /// Indicates in which direction the pathway can be used @@ -781,18 +775,18 @@ pub struct Pathway { pub reversed_signposted_as: Option, } -impl Id for Pathway { +impl WithId for Pathway { fn id(&self) -> &str { &self.id } } -impl From for Pathway { +impl From<(RawPathway, Id)> for Pathway { /// Converts from a [RawPathway] to a [Pathway] - fn from(raw: RawPathway) -> Self { + fn from((raw, to_stop_id): (RawPathway, Id)) -> Self { Self { id: raw.id, - to_stop_id: raw.to_stop_id, + to_stop_id: to_stop_id, mode: raw.mode, is_bidirectional: raw.is_bidirectional, length: raw.length,