Skip to content

Commit

Permalink
Poc to have typed object indexes
Browse files Browse the repository at this point in the history
Same as #123 but
without generational_indexes.

I think the indexes are really great for performance since:
* all structures are then `Vector` (great random access and cache friendly
iterations)
* the ids are short (integer vs string)

I don't think we need those performance in this crate though. Having
only typed index (a think wrapper over the real gtfs identifier) would
be more convenient I think.
It would:
* ease debug (easy to print the gtfs identifier, instead of having a
  meaningless integer)
* ease serialisation (same, we can serialize the string right away)
* be a more more close to the gtfs representation

This is still *very* early WIP, I'm not convinced at all by the
ergonomics, I'd like to keep the property of the Index in
#123 that the `Id`
have at least existed at one point (even if I don't plan to ensure this
if an object is deleted).
  • Loading branch information
antoine-de committed Mar 23, 2022
1 parent 90db386 commit 730b882
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 61 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ read-url = ["reqwest", "futures"]
[dependencies]
bytes = "1"
csv = "1.1"
derivative = "2.1"
derivative = "2.2.0"
serde = "1.0"
serde_derive = "1.0"
chrono = "0.4"
Expand Down
62 changes: 32 additions & 30 deletions src/gtfs.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use crate::{objects::*, Error, RawGtfs};
use crate::{objects::*, Error, Id, RawGtfs};
use chrono::prelude::NaiveDate;
use chrono::Duration;
use std::collections::{HashMap, HashSet};
use std::convert::TryFrom;
use std::sync::Arc;

/// Data structure with all the GTFS objects
///
Expand All @@ -27,8 +26,8 @@ pub struct Gtfs {
pub calendar: HashMap<String, Calendar>,
/// All calendar dates grouped by service_id
pub calendar_dates: HashMap<String, Vec<CalendarDate>>,
/// All stop by `stop_id`. Stops are in an [Arc] because they are also referenced by each [StopTime]
pub stops: HashMap<String, Arc<Stop>>,
/// All stop by `stop_id`
pub stops: HashMap<Id<Stop>, Stop>,
/// All routes by `route_id`
pub routes: HashMap<String, Route>,
/// All trips by `trip_id`
Expand All @@ -49,12 +48,12 @@ impl TryFrom<RawGtfs> for Gtfs {
///
/// It might fail if some mandatory files couldn’t be read or if there are references to other objects that are invalid.
fn try_from(raw: RawGtfs) -> Result<Gtfs, Error> {
let frequencies = raw.frequencies.unwrap_or_else(|| Ok(Vec::new()))?;
let stops = to_stop_map(
raw.stops?,
raw.transfers.unwrap_or_else(|| Ok(Vec::new()))?,
raw.pathways.unwrap_or(Ok(Vec::new()))?,
)?;
let frequencies = raw.frequencies.unwrap_or_else(|| Ok(Vec::new()))?;
let trips = create_trips(raw.trips?, raw.stop_times?, frequencies, &stops)?;

Ok(Gtfs {
Expand Down Expand Up @@ -175,10 +174,10 @@ impl Gtfs {
}

/// Gets a [Stop] by its `stop_id`
pub fn get_stop<'a>(&'a self, id: &str) -> Result<&'a Stop, Error> {
pub fn get_stop<'a>(&'a self, id: &Id<Stop>) -> Result<&'a Stop, Error> {
match self.stops.get(id) {
Some(stop) => Ok(stop),
None => Err(Error::ReferenceError(id.to_owned())),
None => Err(Error::ReferenceError(id.to_string())),
}
}

Expand Down Expand Up @@ -225,7 +224,7 @@ impl Gtfs {
}
}

fn to_map<O: Id>(elements: impl IntoIterator<Item = O>) -> HashMap<String, O> {
fn to_map<O: WithId>(elements: impl IntoIterator<Item = O>) -> HashMap<String, O> {
elements
.into_iter()
.map(|e| (e.id().to_owned(), e))
Expand All @@ -236,33 +235,35 @@ fn to_stop_map(
stops: Vec<Stop>,
raw_transfers: Vec<RawTransfer>,
raw_pathways: Vec<RawPathway>,
) -> Result<HashMap<String, Arc<Stop>>, Error> {
let mut stop_map: HashMap<String, Stop> =
stops.into_iter().map(|s| (s.id.clone(), s)).collect();

) -> Result<HashMap<Id<Stop>, Stop>, Error> {
let mut stop_map = HashMap::<Id<Stop>, Stop>::default();
for s in stops.into_iter() {
stop_map.insert(Id::must_exists(s.id.clone()), s);
}
for transfer in raw_transfers {
// Note: I'm not convinced at all by this Id::must_exists...
// we shouldn't have to allocate here, and we must find a way to really ensure that the id exists (or remove the verbosity)
stop_map
.get(&transfer.to_stop_id)
.get(&Id::must_exists(transfer.to_stop_id.clone()))
.ok_or_else(|| Error::ReferenceError(transfer.to_stop_id.to_string()))?;
stop_map
.entry(transfer.from_stop_id.clone())
.and_modify(|stop| stop.transfers.push(StopTransfer::from(transfer)));
let to_stop_id = Id::must_exists(transfer.to_stop_id.clone());
let s = stop_map
.get_mut(&Id::must_exists(transfer.from_stop_id.clone()))
.ok_or_else(|| Error::ReferenceError(transfer.from_stop_id.to_string()))?;
s.transfers.push(StopTransfer::from((transfer, to_stop_id)));
}

for pathway in raw_pathways {
stop_map
.get(&pathway.to_stop_id)
.get(&Id::must_exists(pathway.to_stop_id.clone()))
.ok_or_else(|| Error::ReferenceError(pathway.to_stop_id.to_string()))?;
stop_map
.entry(pathway.from_stop_id.clone())
.and_modify(|stop| stop.pathways.push(Pathway::from(pathway)));
let s = stop_map
.get_mut(&Id::must_exists(pathway.from_stop_id.clone()))
.ok_or_else(|| Error::ReferenceError(pathway.to_stop_id.to_string()))?;
let stop_id = Id::must_exists(pathway.to_stop_id.clone());
s.pathways.push(Pathway::from((pathway, stop_id)));
}

let res = stop_map
.into_iter()
.map(|(i, s)| (i, Arc::new(s)))
.collect();
Ok(res)
Ok(stop_map)
}

fn to_shape_map(shapes: Vec<Shape>) -> HashMap<String, Vec<Shape>> {
Expand Down Expand Up @@ -292,7 +293,7 @@ fn create_trips(
raw_trips: Vec<RawTrip>,
raw_stop_times: Vec<RawStopTime>,
raw_frequencies: Vec<RawFrequency>,
stops: &HashMap<String, Arc<Stop>>,
stops: &HashMap<Id<Stop>, Stop>,
) -> Result<HashMap<String, Trip>, Error> {
let mut trips = to_map(raw_trips.into_iter().map(|rt| Trip {
id: rt.id,
Expand All @@ -312,10 +313,11 @@ fn create_trips(
let trip = &mut trips
.get_mut(&s.trip_id)
.ok_or_else(|| Error::ReferenceError(s.trip_id.to_string()))?;
let stop_id = Id::must_exists(s.stop_id.clone());
let stop = stops
.get(&s.stop_id)
.ok_or_else(|| Error::ReferenceError(s.stop_id.to_string()))?;
trip.stop_times.push(StopTime::from(&s, Arc::clone(stop)));
.get(&stop_id)
.ok_or_else(|| Error::ReferenceError(stop_id.to_string()))?;
trip.stop_times.push(StopTime::from(&s, stop_id));
}

for trip in &mut trips.values_mut() {
Expand Down
31 changes: 31 additions & 0 deletions src/id.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use core::marker::PhantomData;
#[derive(Derivative)]
#[derive(Serialize, Deserialize)]
#[derivative(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
pub struct Id<T> {
id: String,
#[serde(skip)]
#[derivative(Debug(bound = ""))]
#[derivative(Debug = "ignore")]
#[derivative(Clone(bound = ""))]
#[derivative(Eq(bound = ""))]
#[derivative(PartialEq(bound = ""))]
#[derivative(Hash(bound = ""))]
_phantom: PhantomData<T>,
}

impl<T> Id<T> {
pub fn must_exists(s: String) -> Id<T> {
Id {
id: s,
_phantom: PhantomData,
}
}
}

impl<T> std::ops::Deref for Id<T> {
type Target = str;
fn deref(&self) -> &str {
&self.id
}
}
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ mod enums;
pub mod error;
mod gtfs;
mod gtfs_reader;
mod id;
pub(crate) mod objects;
mod raw_gtfs;
mod serde_helpers;
Expand All @@ -57,5 +58,6 @@ mod tests;
pub use error::Error;
pub use gtfs::Gtfs;
pub use gtfs_reader::GtfsReader;
pub use id::Id;
pub use objects::*;
pub use raw_gtfs::RawGtfs;
54 changes: 24 additions & 30 deletions src/objects.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pub use crate::enums::*;
use crate::serde_helpers::*;
use crate::{serde_helpers::*, Id};
use chrono::{Datelike, NaiveDate, Weekday};
use rgb::RGB8;

Expand All @@ -9,17 +9,11 @@ use std::sync::Arc;
/// Objects that have an identifier implement this trait
///
/// Those identifier are technical and should not be shown to travellers
pub trait Id {
pub trait WithId {
/// Identifier of the object
fn id(&self) -> &str;
}

impl<T: Id> Id for Arc<T> {
fn id(&self) -> &str {
self.as_ref().id()
}
}

/// Trait to introspect what is the object’s type (stop, route…)
pub trait Type {
/// What is the type of the object
Expand Down Expand Up @@ -100,7 +94,7 @@ impl Type for Calendar {
}
}

impl Id for Calendar {
impl WithId for Calendar {
fn id(&self) -> &str {
&self.id
}
Expand Down Expand Up @@ -199,7 +193,7 @@ impl Type for Stop {
}
}

impl Id for Stop {
impl WithId for Stop {
fn id(&self) -> &str {
&self.id
}
Expand Down Expand Up @@ -258,14 +252,14 @@ pub struct RawStopTime {
}

/// The moment where a vehicle, running on [Trip] stops at a [Stop]. See <https://gtfs.org/reference/static/#stopstxt>
#[derive(Clone, Debug, Default)]
#[derive(Clone, Debug)]
pub struct StopTime {
/// Arrival time of the stop time.
/// It's an option since the intermediate stops can have have no arrival
/// and this arrival needs to be interpolated
pub arrival_time: Option<u32>,
/// [Stop] where the vehicle stops
pub stop: Arc<Stop>,
pub stop: Id<Stop>,
/// Departure time of the stop time.
/// It's an option since the intermediate stops can have have no departure
/// and this departure needs to be interpolated
Expand All @@ -290,7 +284,7 @@ pub struct StopTime {

impl StopTime {
/// Creates [StopTime] by linking a [RawStopTime::stop_id] to the actual [Stop]
pub fn from(stop_time_gtfs: &RawStopTime, stop: Arc<Stop>) -> Self {
pub fn from(stop_time_gtfs: &RawStopTime, stop: Id<Stop>) -> Self {
Self {
arrival_time: stop_time_gtfs.arrival_time,
departure_time: stop_time_gtfs.departure_time,
Expand Down Expand Up @@ -362,7 +356,7 @@ impl Type for Route {
}
}

impl Id for Route {
impl WithId for Route {
fn id(&self) -> &str {
&self.id
}
Expand Down Expand Up @@ -412,7 +406,7 @@ impl Type for RawTrip {
}
}

impl Id for RawTrip {
impl WithId for RawTrip {
fn id(&self) -> &str {
&self.id
}
Expand Down Expand Up @@ -463,7 +457,7 @@ impl Type for Trip {
}
}

impl Id for Trip {
impl WithId for Trip {
fn id(&self) -> &str {
&self.id
}
Expand Down Expand Up @@ -514,7 +508,7 @@ impl Type for Agency {
}
}

impl Id for Agency {
impl WithId for Agency {
fn id(&self) -> &str {
match &self.id {
None => "",
Expand Down Expand Up @@ -555,7 +549,7 @@ impl Type for Shape {
}
}

impl Id for Shape {
impl WithId for Shape {
fn id(&self) -> &str {
&self.id
}
Expand All @@ -582,7 +576,7 @@ pub struct FareAttribute {
pub transfer_duration: Option<usize>,
}

impl Id for FareAttribute {
impl WithId for FareAttribute {
fn id(&self) -> &str {
&self.id
}
Expand Down Expand Up @@ -655,22 +649,22 @@ pub struct RawTransfer {
pub min_transfer_time: Option<u32>,
}

#[derive(Debug, Default, Clone)]
#[derive(Debug, Clone)]
/// Transfer information between stops
pub struct StopTransfer {
/// Stop which to transfer to
pub to_stop_id: String,
pub to_stop_id: Id<Stop>,
/// Type of the transfer
pub transfer_type: TransferType,
/// Minimum time needed to make the transfer in seconds
pub min_transfer_time: Option<u32>,
}

impl From<RawTransfer> for StopTransfer {
impl From<(RawTransfer, Id<Stop>)> for StopTransfer {
/// Converts from a [RawTransfer] to a [StopTransfer]
fn from(transfer: RawTransfer) -> Self {
fn from((transfer, to_stop_id): (RawTransfer, Id<Stop>)) -> Self {
Self {
to_stop_id: transfer.to_stop_id,
to_stop_id: to_stop_id,
transfer_type: transfer.transfer_type,
min_transfer_time: transfer.min_transfer_time,
}
Expand Down Expand Up @@ -755,12 +749,12 @@ pub struct RawPathway {
}

/// Pathway going from a stop to another.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Pathway {
/// Uniquely identifies the pathway
pub id: String,
/// Location at which the pathway ends
pub to_stop_id: String,
pub to_stop_id: Id<Stop>,
/// Type of pathway between the specified (from_stop_id, to_stop_id) pair
pub mode: PathwayMode,
/// Indicates in which direction the pathway can be used
Expand All @@ -781,18 +775,18 @@ pub struct Pathway {
pub reversed_signposted_as: Option<String>,
}

impl Id for Pathway {
impl WithId for Pathway {
fn id(&self) -> &str {
&self.id
}
}

impl From<RawPathway> for Pathway {
impl From<(RawPathway, Id<Stop>)> for Pathway {
/// Converts from a [RawPathway] to a [Pathway]
fn from(raw: RawPathway) -> Self {
fn from((raw, to_stop_id): (RawPathway, Id<Stop>)) -> Self {
Self {
id: raw.id,
to_stop_id: raw.to_stop_id,
to_stop_id: to_stop_id,
mode: raw.mode,
is_bidirectional: raw.is_bidirectional,
length: raw.length,
Expand Down

0 comments on commit 730b882

Please sign in to comment.