diff --git a/README.md b/README.md
index 91a3616e3..f783d6fe2 100644
--- a/README.md
+++ b/README.md
@@ -59,14 +59,6 @@ docker-compose --env-file ./config/.env.local up schemaspy -d --force-recreate
scripts/api-start.sh
```
-## Unit tests
-Test are located in `tests` directory.
-
-To run all tests:
-```bash
-scripts/api-start.sh
-```
-
## Linter
This repository uses Flak8 and Black for code styling
diff --git a/api/.openapi-generator/FILES b/api/.openapi-generator/FILES
index 108655582..1de90fd7e 100644
--- a/api/.openapi-generator/FILES
+++ b/api/.openapi-generator/FILES
@@ -18,5 +18,6 @@ src/feeds_gen/models/gtfs_rt_feed.py
src/feeds_gen/models/latest_dataset.py
src/feeds_gen/models/location.py
src/feeds_gen/models/metadata.py
+src/feeds_gen/models/redirect.py
src/feeds_gen/models/source_info.py
src/feeds_gen/security_api.py
diff --git a/api/src/feeds/impl/feeds_api_impl.py b/api/src/feeds/impl/feeds_api_impl.py
index 5d75d0c69..57706877d 100644
--- a/api/src/feeds/impl/feeds_api_impl.py
+++ b/api/src/feeds/impl/feeds_api_impl.py
@@ -13,9 +13,9 @@
t_locationfeed,
Location,
Entitytype,
+ Redirectingid,
)
from database_gen.sqlacodegen_models import (
- t_redirectingid,
t_entitytypefeed,
t_feedreference,
)
@@ -33,6 +33,7 @@
from feeds_gen.models.latest_dataset import LatestDataset
from feeds_gen.models.location import Location as ApiLocation
from feeds_gen.models.source_info import SourceInfo
+from feeds_gen.models.redirect import Redirect
class FeedsApiImpl(BaseFeedsApi):
@@ -48,7 +49,7 @@ class FeedsApiImpl(BaseFeedsApi):
def _create_common_feed(
database_feed: Feed,
clazz: Type[APIFeedType],
- redirects: Set[str],
+ redirects: [Redirect],
external_ids: Set[Externalid],
) -> Union[APIFeedType]:
"""Maps the ORM object Feed to API data model specified by clazz"""
@@ -81,13 +82,13 @@ def _create_common_feed(
def _create_feeds_query(feed_type: Type[Feed]) -> Query:
target_feed = aliased(Feed)
return (
- Query([feed_type, target_feed.stable_id, Externalid])
+ Query([feed_type, target_feed.stable_id, Externalid, Redirectingid.redirect_comment])
.join(
- t_redirectingid,
- feed_type.id == t_redirectingid.c["source_id"],
+ Redirectingid,
+ feed_type.id == Redirectingid.source_id,
isouter=True,
)
- .join(target_feed, t_redirectingid.c.target_id == target_feed.id, isouter=True)
+ .join(target_feed, Redirectingid.target_id == target_feed.id, isouter=True)
.join(Externalid, feed_type.id == Externalid.feed_id, isouter=True)
)
@@ -110,9 +111,16 @@ def _get_basic_feeds(
)
basic_feeds = []
for feed_group in feed_groups:
- feed_objects, redirects, external_ids = zip(*feed_group)
+ feed_objects, redirect_ids, external_ids, redirect_comments = zip(*feed_group)
+ # Put together the redirect ids and the corresponding comments. Eliminate Nones.
+ redirects_list = [
+ Redirect(target_id=redirect, comment=comment)
+ for redirect, comment in zip(redirect_ids, redirect_comments)
+ if redirect is not None
+ ]
+
basic_feeds.append(
- FeedsApiImpl._create_common_feed(feed_objects[0], BasicFeed, set(redirects), set(external_ids))
+ FeedsApiImpl._create_common_feed(feed_objects[0], BasicFeed, redirects_list, set(external_ids))
)
return basic_feeds
@@ -168,9 +176,22 @@ def _get_order_by_key(order_by: list[str] = None):
)
gtfs_feeds = []
for feed_group in feed_groups:
- feed_objects, redirects, external_ids, latest_datasets, locations = zip(*feed_group)
+ feed_objects, redirect_ids, external_ids, redirect_comments, latest_datasets, locations = zip(*feed_group)
+
+ # We use a set to eliminate duplicate in the Redirects.
+ # But we can't use the Redirect object directly since they are not hashable and making them
+ # hashable is more tricky since the class is generated by the openapi generator.
+ # So instead transfer the Redirect data to a simple dict to temporarily use in the set.
+ redirects_set = set()
+ for redirect, comment in zip(redirect_ids, redirect_comments):
+ if redirect is not None:
+ redirect_tuple = (redirect, comment)
+ redirects_set.add(redirect_tuple)
+
+ # Convert the set of unique tuples back to a list of Redirect objects
+ redirects_list = [Redirect(target_id=redirect, comment=comment) for redirect, comment in redirects_set]
- gtfs_feed = FeedsApiImpl._create_common_feed(feed_objects[0], GtfsFeed, set(redirects), set(external_ids))
+ gtfs_feed = FeedsApiImpl._create_common_feed(feed_objects[0], GtfsFeed, redirects_list, set(external_ids))
gtfs_feed.locations = [
ApiLocation(
country_code=location.country_code,
@@ -221,10 +242,19 @@ def _get_gtfs_rt_feeds(
)
gtfs_rt_feeds = []
for feed_group in feed_groups:
- feed_objects, redirects, external_ids, entity_types, feed_references = zip(*feed_group)
+ feed_objects, redirect_ids, external_ids, redirect_comments, entity_types, feed_references = zip(
+ *feed_group
+ )
+
+ # Put together the redirect ids and the corresponding comments. Eliminate Nones.
+ redirects_list = [
+ Redirect(target_id=redirect, comment=comment)
+ for redirect, comment in zip(redirect_ids, redirect_comments)
+ if redirect is not None
+ ]
gtfs_rt_feed = FeedsApiImpl._create_common_feed(
- feed_objects[0], GtfsRTFeed, set(redirects), set(external_ids)
+ feed_objects[0], GtfsRTFeed, redirects_list, set(external_ids)
)
gtfs_rt_feed.entity_types = {entity_type for entity_type in entity_types if entity_type is not None}
gtfs_rt_feed.feed_references = {reference for reference in feed_references if reference is not None}
diff --git a/api/src/scripts/populate_db.py b/api/src/scripts/populate_db.py
index 3f5105ec7..4e17b9d3c 100644
--- a/api/src/scripts/populate_db.py
+++ b/api/src/scripts/populate_db.py
@@ -13,13 +13,13 @@
from database.database import Database, generate_unique_id
from database_gen.sqlacodegen_models import (
Component,
- Feed,
Entitytype,
Externalid,
Gtfsdataset,
Gtfsfeed,
Gtfsrealtimefeed,
Location,
+ Redirectingid,
Base,
)
from utils.logger import Logger
@@ -122,6 +122,10 @@ def add_entity(entity, priority):
if self.df is None:
return
+
+ # Keep a dict (map) of stable_id -> feed so we can reference the feeds when processing the static_reference
+ # and the redirects.
+ feed_map = {}
for index, row in self.df.iterrows():
mdb_id = f"mdb-{int(row['mdb_source_id'])}"
self.logger.debug(f"Populating Database for with Feed [stable_id = {mdb_id}]")
@@ -134,7 +138,7 @@ def add_entity(entity, priority):
feed_name=row["name"],
note=row["note"],
producer_url=row["urls.direct_download"],
- authentication_type=str(int(row["urls.authentication_type"])),
+ authentication_type=str(int(row.get("urls.authentication_type", "0") or "0")),
authentication_info_url=row["urls.authentication_info"],
api_key_parameter_name=row["urls.api_key_parameter_name"],
license_url=row["urls.license"],
@@ -143,6 +147,8 @@ def add_entity(entity, priority):
provider=row["provider"],
)
+ feed_map[mdb_id] = feed
+
# Location
country_code = row["location.country_code"]
subdivision_name = row["location.subdivision_name"]
@@ -210,20 +216,6 @@ def add_entity(entity, priority):
entity_type.feeds.append(feed)
add_entity(entity_type, 4)
- # Feed Reference
- if row["static_reference"] is not None:
- referenced_feeds_list = [
- entity
- for entity in entities
- if isinstance(entity, Feed) and entity.stable_id == f"mdb-{int(row['static_reference'])}"
- ]
- if len(referenced_feeds_list) == 1:
- referenced_feeds_list[0].gtfs_rt_feeds.append(feed)
- else:
- self.logger.error(
- f'Couldn\'t create reference from {feed.stable_id} to {row["static_reference"]}'
- )
-
# External ID
mdb_external_id = Externalid(
feed_id=feed.id,
@@ -232,6 +224,49 @@ def add_entity(entity, priority):
)
add_entity(mdb_external_id, 4)
+ # Iterate again over the contents of the csv files to process the feed references.
+ for index, row in self.df.iterrows():
+ mdb_id = f"mdb-{int(row['mdb_source_id'])}"
+ feed = feed_map[mdb_id]
+ if row["data_type"] == "gtfs_rt":
+ # Feed Reference
+ if row["static_reference"] is not None:
+ static_reference_mdb_id = f"mdb-{int(row['static_reference'])}"
+ referenced_feed = feed_map.get(static_reference_mdb_id, None)
+ if referenced_feed:
+ referenced_feed.gtfs_rt_feeds.append(feed)
+
+ # Process redirects
+ raw_redirects = row.get("redirect.id", None)
+ redirects_ids = raw_redirects.split("|") if raw_redirects is not None else []
+ raw_comments = row.get("redirect.comment", None)
+ comments = raw_comments.split("|") if raw_comments is not None else []
+
+ if len(redirects_ids) != len(comments):
+ self.logger.warn(f"Number of redirect ids and redirect comments differ for feed {mdb_id}")
+
+ for mdb_source_id in redirects_ids:
+ if len(mdb_source_id) == 0:
+ # since there is a 1:1 correspondence between redirect ids and comments, skip also the comment
+ comments = comments[1:]
+ continue
+ if comments:
+ comment = comments.pop(0)
+ else:
+ comment = ""
+
+ target_stable_id = f"mdb-{mdb_source_id}"
+ target_feed = feed_map.get(target_stable_id, None)
+
+ if target_feed:
+ if target_feed.id != feed.id:
+ redirect = Redirectingid(source_id=feed.id, target_id=target_feed.id, redirect_comment=comment)
+ add_entity(redirect, 5)
+ else:
+ self.logger.error(f"Feed has redirect pointing to itself {mdb_id}")
+ else:
+ self.logger.warn(f"Could not find redirect target feed {target_stable_id} for feed {mdb_id}")
+
priority = 1
while not entities_index.empty():
next_priority, entity_index = entities_index.get()
diff --git a/api/tests/test_database.py b/api/tests/test_database.py
index d0c8e33b1..b6fbc9e9c 100644
--- a/api/tests/test_database.py
+++ b/api/tests/test_database.py
@@ -87,14 +87,14 @@ def test_merge_gtfs_feed(test_database):
assert sorted([external_id.source for external_id in feed_1.external_ids]) == ["source1", "source2"]
assert feed_1.latest_dataset.id == TEST_DATASET_STABLE_IDS[1]
- assert sorted([redirect for redirect in feed_1.redirects]) == [TEST_GTFS_FEED_STABLE_IDS[1]]
+ assert sorted([redirect.target_id for redirect in feed_1.redirects]) == [TEST_GTFS_FEED_STABLE_IDS[1]]
assert feed_2 is not None
assert sorted([external_id.external_id for external_id in feed_2.external_ids]) == TEST_EXTERNAL_IDS[2:]
assert sorted([external_id.source for external_id in feed_2.external_ids]) == ["source3", "source4"]
assert feed_2.latest_dataset.id == TEST_DATASET_STABLE_IDS[3]
- assert sorted([redirect for redirect in feed_2.redirects]) == [
+ assert sorted([redirect.target_id for redirect in feed_2.redirects]) == [
TEST_GTFS_FEED_STABLE_IDS[2],
TEST_GTFS_FEED_STABLE_IDS[3],
]
diff --git a/api/tests/unittest/test_feeds.py b/api/tests/unittest/test_feeds.py
index b8fbe0cd8..2b37da40f 100644
--- a/api/tests/unittest/test_feeds.py
+++ b/api/tests/unittest/test_feeds.py
@@ -4,6 +4,16 @@
from database.database import Database
from database_gen.sqlacodegen_models import Feed, Externalid, Location, Gtfsdataset
+redirect_target_id = "test_target_id"
+redirect_comment = "Some comment"
+expected_redirect_response = {"target_id": redirect_target_id, "comment": redirect_comment}
+
+
+def check_redirect(response: dict):
+ assert (
+ response["redirects"][0] == expected_redirect_response
+ ), f'Response feed redirect was {response["redirects"][0]} instead of {expected_redirect_response}'
+
def test_feeds_get(client: TestClient, mocker):
"""
@@ -13,7 +23,7 @@ def test_feeds_get(client: TestClient, mocker):
mock_feed = Feed(stable_id="test_id")
mock_external_id = Externalid(associated_id="test_associated_id", source="test_source")
- mock_select.return_value = [[(mock_feed, "test_target_id", mock_external_id)]]
+ mock_select.return_value = [[(mock_feed, redirect_target_id, mock_external_id, redirect_comment)]]
response = client.request(
"GET",
"/v1/feeds",
@@ -30,9 +40,7 @@ def test_feeds_get(client: TestClient, mocker):
assert (
response_feed["external_ids"][0]["source"] == "test_source"
), f'Response feed source was {response_feed["external_ids"][0]["source"]} instead of test_source'
- assert (
- response_feed["redirects"][0] == "test_target_id"
- ), f'Response feed redirect was {response_feed["redirects"][0]} instead of test_target_id'
+ check_redirect(response_feed)
def test_feed_get(client: TestClient, mocker):
@@ -42,7 +50,7 @@ def test_feed_get(client: TestClient, mocker):
mock_select = mocker.patch.object(Database(), "select")
mock_feed = Feed(stable_id="test_id")
mock_external_id = Externalid(associated_id="test_associated_id", source="test_source")
- mock_select.return_value = [[(mock_feed, "test_target_id", mock_external_id)]]
+ mock_select.return_value = [[(mock_feed, redirect_target_id, mock_external_id, redirect_comment)]]
response = client.request(
"GET",
@@ -60,9 +68,8 @@ def test_feed_get(client: TestClient, mocker):
assert (
response_feed["external_ids"][0]["source"] == "test_source"
), f'Response feed source was {response_feed["external_ids"][0]["source"]} instead of test_source'
- assert (
- response_feed["redirects"][0] == "test_target_id"
- ), f'Response feed redirect was {response_feed["redirects"][0]} instead of test_target_id'
+
+ check_redirect(response_feed)
def test_gtfs_feeds_get(client: TestClient, mocker):
@@ -79,7 +86,9 @@ def test_gtfs_feeds_get(client: TestClient, mocker):
subdivision_name="test_subdivision_name",
municipality="test_municipality",
)
- mock_select.return_value = [[(mock_feed, "test_target_id", mock_external_id, mock_latest_datasets, mock_locations)]]
+ mock_select.return_value = [
+ [(mock_feed, redirect_target_id, mock_external_id, redirect_comment, mock_latest_datasets, mock_locations)]
+ ]
response = client.request(
"GET",
@@ -100,9 +109,7 @@ def test_gtfs_feeds_get(client: TestClient, mocker):
assert (
response_gtfs_feed["external_ids"][0]["source"] == "test_source"
), f'Response feed source was {response_gtfs_feed["external_ids"][0]["source"]} instead of test_source'
- assert (
- response_gtfs_feed["redirects"][0] == "test_target_id"
- ), f'Response feed redirect was {response_gtfs_feed["redirects"][0]} instead of test_target_id'
+ check_redirect(response_gtfs_feed)
assert (
response_gtfs_feed["locations"][0]["country_code"] == "test_country_code"
), f'Response feed country code was {response_gtfs_feed["locations"][0]["country_code"]} \
@@ -139,7 +146,9 @@ def test_gtfs_feed_get(client: TestClient, mocker):
subdivision_name="test_subdivision_name",
municipality="test_municipality",
)
- mock_select.return_value = [[(mock_feed, "test_target_id", mock_external_id, mock_latest_datasets, mock_locations)]]
+ mock_select.return_value = [
+ [(mock_feed, redirect_target_id, mock_external_id, redirect_comment, mock_latest_datasets, mock_locations)]
+ ]
response = client.request(
"GET",
@@ -160,9 +169,9 @@ def test_gtfs_feed_get(client: TestClient, mocker):
assert (
response_gtfs_feed["external_ids"][0]["source"] == "test_source"
), f'Response feed source was {response_gtfs_feed["external_ids"][0]["source"]} instead of test_source'
- assert (
- response_gtfs_feed["redirects"][0] == "test_target_id"
- ), f'Response feed redirect was {response_gtfs_feed["redirects"][0]} instead of test_target_id'
+
+ check_redirect(response_gtfs_feed)
+
assert (
response_gtfs_feed["locations"][0]["country_code"] == "test_country_code"
), f'Response feed country code was {response_gtfs_feed["locations"][0]["country_code"]} \
@@ -195,7 +204,7 @@ def test_gtfs_rt_feeds_get(client: TestClient, mocker):
mock_entity_types = "test_entity_type"
mock_feed_references = "test_feed_reference"
mock_select.return_value = [
- [(mock_feed, "test_target_id", mock_external_id, mock_entity_types, mock_feed_references)]
+ [(mock_feed, redirect_target_id, mock_external_id, redirect_comment, mock_entity_types, mock_feed_references)]
]
response = client.request(
@@ -217,9 +226,9 @@ def test_gtfs_rt_feeds_get(client: TestClient, mocker):
assert (
response_gtfs_rt_feed["external_ids"][0]["source"] == "test_source"
), f'Response feed source was {response_gtfs_rt_feed["external_ids"][0]["source"]} instead of test_source'
- assert (
- response_gtfs_rt_feed["redirects"][0] == "test_target_id"
- ), f'Response feed redirect was {response_gtfs_rt_feed["redirects"][0]} instead of test_target_id'
+
+ check_redirect(response_gtfs_rt_feed)
+
assert (
response_gtfs_rt_feed["entity_types"][0] == "test_entity_type"
), f'Response feed entity type was {response_gtfs_rt_feed["entity_types"][0]} instead of test_entity_type'
@@ -238,7 +247,7 @@ def test_gtfs_rt_feed_get(client: TestClient, mocker):
mock_entity_types = "test_entity_type"
mock_feed_references = "test_feed_reference"
mock_select.return_value = [
- [(mock_feed, "test_target_id", mock_external_id, mock_entity_types, mock_feed_references)]
+ [(mock_feed, redirect_target_id, mock_external_id, redirect_comment, mock_entity_types, mock_feed_references)]
]
response = client.request(
@@ -260,9 +269,9 @@ def test_gtfs_rt_feed_get(client: TestClient, mocker):
assert (
response_gtfs_rt_feed["external_ids"][0]["source"] == "test_source"
), f'Response feed source was {response_gtfs_rt_feed["external_ids"][0]["source"]} instead of test_source'
- assert (
- response_gtfs_rt_feed["redirects"][0] == "test_target_id"
- ), f'Response feed redirect was {response_gtfs_rt_feed["redirects"][0]} instead of test_target_id'
+
+ check_redirect(response_gtfs_rt_feed)
+
assert (
response_gtfs_rt_feed["entity_types"][0] == "test_entity_type"
), f'Response feed entity type was {response_gtfs_rt_feed["entity_types"][0]} instead of test_entity_type'
diff --git a/docs/DatabaseCatalogAPI.yaml b/docs/DatabaseCatalogAPI.yaml
index a70030bfe..186534bee 100644
--- a/docs/DatabaseCatalogAPI.yaml
+++ b/docs/DatabaseCatalogAPI.yaml
@@ -233,13 +233,24 @@ paths:
components:
schemas:
+ Redirect:
+ type: object
+ properties:
+ target_id:
+ description: ID of the feed to which the current feed is being redirected.
+ type: string
+ example: mdb-10
+ comment:
+ description: A comment explaining the redirect.
+ type: string
+ example: Redirected because of a change of URL.
BasicFeed:
type: object
properties:
id:
description: Unique identifier used as a key for the feeds table.
type: string
- example: mdb_10
+ example: mdb-10
data_type:
type: string
enum:
@@ -285,11 +296,7 @@ components:
redirects:
type: array
items:
- description: >
- The feed ID that should be used in replacement of the current one.
- A redirect is defined when several expired feeds are aggregated together or split into new feeds.
- type: string
- example: mdb-100
+ $ref: "#/components/schemas/Redirect"
GtfsFeed:
allOf:
diff --git a/docs/DatabaseCatalogAPI_IAP.yaml b/docs/DatabaseCatalogAPI_IAP.yaml
index 4d70f446a..4ee200b6f 100644
--- a/docs/DatabaseCatalogAPI_IAP.yaml
+++ b/docs/DatabaseCatalogAPI_IAP.yaml
@@ -233,13 +233,24 @@ paths:
components:
schemas:
+ Redirect:
+ type: object
+ properties:
+ target_id:
+ description: ID of the feed to which the current feed is being redirected.
+ type: string
+ example: mdb-10
+ comment:
+ description: A comment explaining the redirect.
+ type: string
+ example: Redirected because of a change of URL.
BasicFeed:
type: object
properties:
id:
description: Unique identifier used as a key for the feeds table.
type: string
- example: mdb_10
+ example: mdb-10
data_type:
type: string
enum:
@@ -285,11 +296,7 @@ components:
redirects:
type: array
items:
- description: >
- The feed ID that should be used in replacement of the current one.
- A redirect is defined when several expired feeds are aggregated together or split into new feeds.
- type: string
- example: mdb-100
+ $ref: "#/components/schemas/Redirect"
GtfsFeed:
allOf:
diff --git a/liquibase/changelog.xml b/liquibase/changelog.xml
index 9d74c3b35..e2cb98160 100644
--- a/liquibase/changelog.xml
+++ b/liquibase/changelog.xml
@@ -14,4 +14,5 @@
+
\ No newline at end of file
diff --git a/liquibase/changes/feat_149.sql b/liquibase/changes/feat_149.sql
new file mode 100644
index 000000000..ee736274e
--- /dev/null
+++ b/liquibase/changes/feat_149.sql
@@ -0,0 +1,2 @@
+ALTER TABLE RedirectingID
+ADD COLUMN redirect_comment VARCHAR(255);
\ No newline at end of file