Skip to content

Commit

Permalink
feat: Added redirect field (id and comment) (#150)
Browse files Browse the repository at this point in the history
  • Loading branch information
jcpitre authored Nov 6, 2023
1 parent bd7357b commit ffa1896
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 74 deletions.
8 changes: 0 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,6 @@ docker-compose --env-file ./config/.env.local up schemaspy -d --force-recreate
scripts/api-start.sh
```

## Unit tests
Test are located in `tests` directory.

To run all tests:
```bash
scripts/api-start.sh
```

## Linter
This repository uses Flak8 and Black for code styling

Expand Down
1 change: 1 addition & 0 deletions api/.openapi-generator/FILES
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ src/feeds_gen/models/gtfs_rt_feed.py
src/feeds_gen/models/latest_dataset.py
src/feeds_gen/models/location.py
src/feeds_gen/models/metadata.py
src/feeds_gen/models/redirect.py
src/feeds_gen/models/source_info.py
src/feeds_gen/security_api.py
54 changes: 42 additions & 12 deletions api/src/feeds/impl/feeds_api_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
t_locationfeed,
Location,
Entitytype,
Redirectingid,
)
from database_gen.sqlacodegen_models import (
t_redirectingid,
t_entitytypefeed,
t_feedreference,
)
Expand All @@ -33,6 +33,7 @@
from feeds_gen.models.latest_dataset import LatestDataset
from feeds_gen.models.location import Location as ApiLocation
from feeds_gen.models.source_info import SourceInfo
from feeds_gen.models.redirect import Redirect


class FeedsApiImpl(BaseFeedsApi):
Expand All @@ -48,7 +49,7 @@ class FeedsApiImpl(BaseFeedsApi):
def _create_common_feed(
database_feed: Feed,
clazz: Type[APIFeedType],
redirects: Set[str],
redirects: [Redirect],
external_ids: Set[Externalid],
) -> Union[APIFeedType]:
"""Maps the ORM object Feed to API data model specified by clazz"""
Expand Down Expand Up @@ -81,13 +82,13 @@ def _create_common_feed(
def _create_feeds_query(feed_type: Type[Feed]) -> Query:
target_feed = aliased(Feed)
return (
Query([feed_type, target_feed.stable_id, Externalid])
Query([feed_type, target_feed.stable_id, Externalid, Redirectingid.redirect_comment])
.join(
t_redirectingid,
feed_type.id == t_redirectingid.c["source_id"],
Redirectingid,
feed_type.id == Redirectingid.source_id,
isouter=True,
)
.join(target_feed, t_redirectingid.c.target_id == target_feed.id, isouter=True)
.join(target_feed, Redirectingid.target_id == target_feed.id, isouter=True)
.join(Externalid, feed_type.id == Externalid.feed_id, isouter=True)
)

Expand All @@ -110,9 +111,16 @@ def _get_basic_feeds(
)
basic_feeds = []
for feed_group in feed_groups:
feed_objects, redirects, external_ids = zip(*feed_group)
feed_objects, redirect_ids, external_ids, redirect_comments = zip(*feed_group)
# Put together the redirect ids and the corresponding comments. Eliminate Nones.
redirects_list = [
Redirect(target_id=redirect, comment=comment)
for redirect, comment in zip(redirect_ids, redirect_comments)
if redirect is not None
]

basic_feeds.append(
FeedsApiImpl._create_common_feed(feed_objects[0], BasicFeed, set(redirects), set(external_ids))
FeedsApiImpl._create_common_feed(feed_objects[0], BasicFeed, redirects_list, set(external_ids))
)
return basic_feeds

Expand Down Expand Up @@ -168,9 +176,22 @@ def _get_order_by_key(order_by: list[str] = None):
)
gtfs_feeds = []
for feed_group in feed_groups:
feed_objects, redirects, external_ids, latest_datasets, locations = zip(*feed_group)
feed_objects, redirect_ids, external_ids, redirect_comments, latest_datasets, locations = zip(*feed_group)

# We use a set to eliminate duplicate in the Redirects.
# But we can't use the Redirect object directly since they are not hashable and making them
# hashable is more tricky since the class is generated by the openapi generator.
# So instead transfer the Redirect data to a simple dict to temporarily use in the set.
redirects_set = set()
for redirect, comment in zip(redirect_ids, redirect_comments):
if redirect is not None:
redirect_tuple = (redirect, comment)
redirects_set.add(redirect_tuple)

# Convert the set of unique tuples back to a list of Redirect objects
redirects_list = [Redirect(target_id=redirect, comment=comment) for redirect, comment in redirects_set]

gtfs_feed = FeedsApiImpl._create_common_feed(feed_objects[0], GtfsFeed, set(redirects), set(external_ids))
gtfs_feed = FeedsApiImpl._create_common_feed(feed_objects[0], GtfsFeed, redirects_list, set(external_ids))
gtfs_feed.locations = [
ApiLocation(
country_code=location.country_code,
Expand Down Expand Up @@ -221,10 +242,19 @@ def _get_gtfs_rt_feeds(
)
gtfs_rt_feeds = []
for feed_group in feed_groups:
feed_objects, redirects, external_ids, entity_types, feed_references = zip(*feed_group)
feed_objects, redirect_ids, external_ids, redirect_comments, entity_types, feed_references = zip(
*feed_group
)

# Put together the redirect ids and the corresponding comments. Eliminate Nones.
redirects_list = [
Redirect(target_id=redirect, comment=comment)
for redirect, comment in zip(redirect_ids, redirect_comments)
if redirect is not None
]

gtfs_rt_feed = FeedsApiImpl._create_common_feed(
feed_objects[0], GtfsRTFeed, set(redirects), set(external_ids)
feed_objects[0], GtfsRTFeed, redirects_list, set(external_ids)
)
gtfs_rt_feed.entity_types = {entity_type for entity_type in entity_types if entity_type is not None}
gtfs_rt_feed.feed_references = {reference for reference in feed_references if reference is not None}
Expand Down
67 changes: 51 additions & 16 deletions api/src/scripts/populate_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
from database.database import Database, generate_unique_id
from database_gen.sqlacodegen_models import (
Component,
Feed,
Entitytype,
Externalid,
Gtfsdataset,
Gtfsfeed,
Gtfsrealtimefeed,
Location,
Redirectingid,
Base,
)
from utils.logger import Logger
Expand Down Expand Up @@ -122,6 +122,10 @@ def add_entity(entity, priority):

if self.df is None:
return

# Keep a dict (map) of stable_id -> feed so we can reference the feeds when processing the static_reference
# and the redirects.
feed_map = {}
for index, row in self.df.iterrows():
mdb_id = f"mdb-{int(row['mdb_source_id'])}"
self.logger.debug(f"Populating Database for with Feed [stable_id = {mdb_id}]")
Expand All @@ -134,7 +138,7 @@ def add_entity(entity, priority):
feed_name=row["name"],
note=row["note"],
producer_url=row["urls.direct_download"],
authentication_type=str(int(row["urls.authentication_type"])),
authentication_type=str(int(row.get("urls.authentication_type", "0") or "0")),
authentication_info_url=row["urls.authentication_info"],
api_key_parameter_name=row["urls.api_key_parameter_name"],
license_url=row["urls.license"],
Expand All @@ -143,6 +147,8 @@ def add_entity(entity, priority):
provider=row["provider"],
)

feed_map[mdb_id] = feed

# Location
country_code = row["location.country_code"]
subdivision_name = row["location.subdivision_name"]
Expand Down Expand Up @@ -210,20 +216,6 @@ def add_entity(entity, priority):
entity_type.feeds.append(feed)
add_entity(entity_type, 4)

# Feed Reference
if row["static_reference"] is not None:
referenced_feeds_list = [
entity
for entity in entities
if isinstance(entity, Feed) and entity.stable_id == f"mdb-{int(row['static_reference'])}"
]
if len(referenced_feeds_list) == 1:
referenced_feeds_list[0].gtfs_rt_feeds.append(feed)
else:
self.logger.error(
f'Couldn\'t create reference from {feed.stable_id} to {row["static_reference"]}'
)

# External ID
mdb_external_id = Externalid(
feed_id=feed.id,
Expand All @@ -232,6 +224,49 @@ def add_entity(entity, priority):
)
add_entity(mdb_external_id, 4)

# Iterate again over the contents of the csv files to process the feed references.
for index, row in self.df.iterrows():
mdb_id = f"mdb-{int(row['mdb_source_id'])}"
feed = feed_map[mdb_id]
if row["data_type"] == "gtfs_rt":
# Feed Reference
if row["static_reference"] is not None:
static_reference_mdb_id = f"mdb-{int(row['static_reference'])}"
referenced_feed = feed_map.get(static_reference_mdb_id, None)
if referenced_feed:
referenced_feed.gtfs_rt_feeds.append(feed)

# Process redirects
raw_redirects = row.get("redirect.id", None)
redirects_ids = raw_redirects.split("|") if raw_redirects is not None else []
raw_comments = row.get("redirect.comment", None)
comments = raw_comments.split("|") if raw_comments is not None else []

if len(redirects_ids) != len(comments):
self.logger.warn(f"Number of redirect ids and redirect comments differ for feed {mdb_id}")

for mdb_source_id in redirects_ids:
if len(mdb_source_id) == 0:
# since there is a 1:1 correspondence between redirect ids and comments, skip also the comment
comments = comments[1:]
continue
if comments:
comment = comments.pop(0)
else:
comment = ""

target_stable_id = f"mdb-{mdb_source_id}"
target_feed = feed_map.get(target_stable_id, None)

if target_feed:
if target_feed.id != feed.id:
redirect = Redirectingid(source_id=feed.id, target_id=target_feed.id, redirect_comment=comment)
add_entity(redirect, 5)
else:
self.logger.error(f"Feed has redirect pointing to itself {mdb_id}")
else:
self.logger.warn(f"Could not find redirect target feed {target_stable_id} for feed {mdb_id}")

priority = 1
while not entities_index.empty():
next_priority, entity_index = entities_index.get()
Expand Down
4 changes: 2 additions & 2 deletions api/tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,14 @@ def test_merge_gtfs_feed(test_database):
assert sorted([external_id.source for external_id in feed_1.external_ids]) == ["source1", "source2"]

assert feed_1.latest_dataset.id == TEST_DATASET_STABLE_IDS[1]
assert sorted([redirect for redirect in feed_1.redirects]) == [TEST_GTFS_FEED_STABLE_IDS[1]]
assert sorted([redirect.target_id for redirect in feed_1.redirects]) == [TEST_GTFS_FEED_STABLE_IDS[1]]

assert feed_2 is not None
assert sorted([external_id.external_id for external_id in feed_2.external_ids]) == TEST_EXTERNAL_IDS[2:]
assert sorted([external_id.source for external_id in feed_2.external_ids]) == ["source3", "source4"]

assert feed_2.latest_dataset.id == TEST_DATASET_STABLE_IDS[3]
assert sorted([redirect for redirect in feed_2.redirects]) == [
assert sorted([redirect.target_id for redirect in feed_2.redirects]) == [
TEST_GTFS_FEED_STABLE_IDS[2],
TEST_GTFS_FEED_STABLE_IDS[3],
]
Loading

0 comments on commit ffa1896

Please sign in to comment.