diff --git a/README.md b/README.md index 91a3616e3..f783d6fe2 100644 --- a/README.md +++ b/README.md @@ -59,14 +59,6 @@ docker-compose --env-file ./config/.env.local up schemaspy -d --force-recreate scripts/api-start.sh ``` -## Unit tests -Test are located in `tests` directory. - -To run all tests: -```bash -scripts/api-start.sh -``` - ## Linter This repository uses Flak8 and Black for code styling diff --git a/api/.openapi-generator/FILES b/api/.openapi-generator/FILES index 108655582..1de90fd7e 100644 --- a/api/.openapi-generator/FILES +++ b/api/.openapi-generator/FILES @@ -18,5 +18,6 @@ src/feeds_gen/models/gtfs_rt_feed.py src/feeds_gen/models/latest_dataset.py src/feeds_gen/models/location.py src/feeds_gen/models/metadata.py +src/feeds_gen/models/redirect.py src/feeds_gen/models/source_info.py src/feeds_gen/security_api.py diff --git a/api/src/feeds/impl/feeds_api_impl.py b/api/src/feeds/impl/feeds_api_impl.py index 5d75d0c69..57706877d 100644 --- a/api/src/feeds/impl/feeds_api_impl.py +++ b/api/src/feeds/impl/feeds_api_impl.py @@ -13,9 +13,9 @@ t_locationfeed, Location, Entitytype, + Redirectingid, ) from database_gen.sqlacodegen_models import ( - t_redirectingid, t_entitytypefeed, t_feedreference, ) @@ -33,6 +33,7 @@ from feeds_gen.models.latest_dataset import LatestDataset from feeds_gen.models.location import Location as ApiLocation from feeds_gen.models.source_info import SourceInfo +from feeds_gen.models.redirect import Redirect class FeedsApiImpl(BaseFeedsApi): @@ -48,7 +49,7 @@ class FeedsApiImpl(BaseFeedsApi): def _create_common_feed( database_feed: Feed, clazz: Type[APIFeedType], - redirects: Set[str], + redirects: [Redirect], external_ids: Set[Externalid], ) -> Union[APIFeedType]: """Maps the ORM object Feed to API data model specified by clazz""" @@ -81,13 +82,13 @@ def _create_common_feed( def _create_feeds_query(feed_type: Type[Feed]) -> Query: target_feed = aliased(Feed) return ( - Query([feed_type, target_feed.stable_id, Externalid]) + Query([feed_type, target_feed.stable_id, Externalid, Redirectingid.redirect_comment]) .join( - t_redirectingid, - feed_type.id == t_redirectingid.c["source_id"], + Redirectingid, + feed_type.id == Redirectingid.source_id, isouter=True, ) - .join(target_feed, t_redirectingid.c.target_id == target_feed.id, isouter=True) + .join(target_feed, Redirectingid.target_id == target_feed.id, isouter=True) .join(Externalid, feed_type.id == Externalid.feed_id, isouter=True) ) @@ -110,9 +111,16 @@ def _get_basic_feeds( ) basic_feeds = [] for feed_group in feed_groups: - feed_objects, redirects, external_ids = zip(*feed_group) + feed_objects, redirect_ids, external_ids, redirect_comments = zip(*feed_group) + # Put together the redirect ids and the corresponding comments. Eliminate Nones. + redirects_list = [ + Redirect(target_id=redirect, comment=comment) + for redirect, comment in zip(redirect_ids, redirect_comments) + if redirect is not None + ] + basic_feeds.append( - FeedsApiImpl._create_common_feed(feed_objects[0], BasicFeed, set(redirects), set(external_ids)) + FeedsApiImpl._create_common_feed(feed_objects[0], BasicFeed, redirects_list, set(external_ids)) ) return basic_feeds @@ -168,9 +176,22 @@ def _get_order_by_key(order_by: list[str] = None): ) gtfs_feeds = [] for feed_group in feed_groups: - feed_objects, redirects, external_ids, latest_datasets, locations = zip(*feed_group) + feed_objects, redirect_ids, external_ids, redirect_comments, latest_datasets, locations = zip(*feed_group) + + # We use a set to eliminate duplicate in the Redirects. + # But we can't use the Redirect object directly since they are not hashable and making them + # hashable is more tricky since the class is generated by the openapi generator. + # So instead transfer the Redirect data to a simple dict to temporarily use in the set. + redirects_set = set() + for redirect, comment in zip(redirect_ids, redirect_comments): + if redirect is not None: + redirect_tuple = (redirect, comment) + redirects_set.add(redirect_tuple) + + # Convert the set of unique tuples back to a list of Redirect objects + redirects_list = [Redirect(target_id=redirect, comment=comment) for redirect, comment in redirects_set] - gtfs_feed = FeedsApiImpl._create_common_feed(feed_objects[0], GtfsFeed, set(redirects), set(external_ids)) + gtfs_feed = FeedsApiImpl._create_common_feed(feed_objects[0], GtfsFeed, redirects_list, set(external_ids)) gtfs_feed.locations = [ ApiLocation( country_code=location.country_code, @@ -221,10 +242,19 @@ def _get_gtfs_rt_feeds( ) gtfs_rt_feeds = [] for feed_group in feed_groups: - feed_objects, redirects, external_ids, entity_types, feed_references = zip(*feed_group) + feed_objects, redirect_ids, external_ids, redirect_comments, entity_types, feed_references = zip( + *feed_group + ) + + # Put together the redirect ids and the corresponding comments. Eliminate Nones. + redirects_list = [ + Redirect(target_id=redirect, comment=comment) + for redirect, comment in zip(redirect_ids, redirect_comments) + if redirect is not None + ] gtfs_rt_feed = FeedsApiImpl._create_common_feed( - feed_objects[0], GtfsRTFeed, set(redirects), set(external_ids) + feed_objects[0], GtfsRTFeed, redirects_list, set(external_ids) ) gtfs_rt_feed.entity_types = {entity_type for entity_type in entity_types if entity_type is not None} gtfs_rt_feed.feed_references = {reference for reference in feed_references if reference is not None} diff --git a/api/src/scripts/populate_db.py b/api/src/scripts/populate_db.py index 3f5105ec7..4e17b9d3c 100644 --- a/api/src/scripts/populate_db.py +++ b/api/src/scripts/populate_db.py @@ -13,13 +13,13 @@ from database.database import Database, generate_unique_id from database_gen.sqlacodegen_models import ( Component, - Feed, Entitytype, Externalid, Gtfsdataset, Gtfsfeed, Gtfsrealtimefeed, Location, + Redirectingid, Base, ) from utils.logger import Logger @@ -122,6 +122,10 @@ def add_entity(entity, priority): if self.df is None: return + + # Keep a dict (map) of stable_id -> feed so we can reference the feeds when processing the static_reference + # and the redirects. + feed_map = {} for index, row in self.df.iterrows(): mdb_id = f"mdb-{int(row['mdb_source_id'])}" self.logger.debug(f"Populating Database for with Feed [stable_id = {mdb_id}]") @@ -134,7 +138,7 @@ def add_entity(entity, priority): feed_name=row["name"], note=row["note"], producer_url=row["urls.direct_download"], - authentication_type=str(int(row["urls.authentication_type"])), + authentication_type=str(int(row.get("urls.authentication_type", "0") or "0")), authentication_info_url=row["urls.authentication_info"], api_key_parameter_name=row["urls.api_key_parameter_name"], license_url=row["urls.license"], @@ -143,6 +147,8 @@ def add_entity(entity, priority): provider=row["provider"], ) + feed_map[mdb_id] = feed + # Location country_code = row["location.country_code"] subdivision_name = row["location.subdivision_name"] @@ -210,20 +216,6 @@ def add_entity(entity, priority): entity_type.feeds.append(feed) add_entity(entity_type, 4) - # Feed Reference - if row["static_reference"] is not None: - referenced_feeds_list = [ - entity - for entity in entities - if isinstance(entity, Feed) and entity.stable_id == f"mdb-{int(row['static_reference'])}" - ] - if len(referenced_feeds_list) == 1: - referenced_feeds_list[0].gtfs_rt_feeds.append(feed) - else: - self.logger.error( - f'Couldn\'t create reference from {feed.stable_id} to {row["static_reference"]}' - ) - # External ID mdb_external_id = Externalid( feed_id=feed.id, @@ -232,6 +224,49 @@ def add_entity(entity, priority): ) add_entity(mdb_external_id, 4) + # Iterate again over the contents of the csv files to process the feed references. + for index, row in self.df.iterrows(): + mdb_id = f"mdb-{int(row['mdb_source_id'])}" + feed = feed_map[mdb_id] + if row["data_type"] == "gtfs_rt": + # Feed Reference + if row["static_reference"] is not None: + static_reference_mdb_id = f"mdb-{int(row['static_reference'])}" + referenced_feed = feed_map.get(static_reference_mdb_id, None) + if referenced_feed: + referenced_feed.gtfs_rt_feeds.append(feed) + + # Process redirects + raw_redirects = row.get("redirect.id", None) + redirects_ids = raw_redirects.split("|") if raw_redirects is not None else [] + raw_comments = row.get("redirect.comment", None) + comments = raw_comments.split("|") if raw_comments is not None else [] + + if len(redirects_ids) != len(comments): + self.logger.warn(f"Number of redirect ids and redirect comments differ for feed {mdb_id}") + + for mdb_source_id in redirects_ids: + if len(mdb_source_id) == 0: + # since there is a 1:1 correspondence between redirect ids and comments, skip also the comment + comments = comments[1:] + continue + if comments: + comment = comments.pop(0) + else: + comment = "" + + target_stable_id = f"mdb-{mdb_source_id}" + target_feed = feed_map.get(target_stable_id, None) + + if target_feed: + if target_feed.id != feed.id: + redirect = Redirectingid(source_id=feed.id, target_id=target_feed.id, redirect_comment=comment) + add_entity(redirect, 5) + else: + self.logger.error(f"Feed has redirect pointing to itself {mdb_id}") + else: + self.logger.warn(f"Could not find redirect target feed {target_stable_id} for feed {mdb_id}") + priority = 1 while not entities_index.empty(): next_priority, entity_index = entities_index.get() diff --git a/api/tests/test_database.py b/api/tests/test_database.py index d0c8e33b1..b6fbc9e9c 100644 --- a/api/tests/test_database.py +++ b/api/tests/test_database.py @@ -87,14 +87,14 @@ def test_merge_gtfs_feed(test_database): assert sorted([external_id.source for external_id in feed_1.external_ids]) == ["source1", "source2"] assert feed_1.latest_dataset.id == TEST_DATASET_STABLE_IDS[1] - assert sorted([redirect for redirect in feed_1.redirects]) == [TEST_GTFS_FEED_STABLE_IDS[1]] + assert sorted([redirect.target_id for redirect in feed_1.redirects]) == [TEST_GTFS_FEED_STABLE_IDS[1]] assert feed_2 is not None assert sorted([external_id.external_id for external_id in feed_2.external_ids]) == TEST_EXTERNAL_IDS[2:] assert sorted([external_id.source for external_id in feed_2.external_ids]) == ["source3", "source4"] assert feed_2.latest_dataset.id == TEST_DATASET_STABLE_IDS[3] - assert sorted([redirect for redirect in feed_2.redirects]) == [ + assert sorted([redirect.target_id for redirect in feed_2.redirects]) == [ TEST_GTFS_FEED_STABLE_IDS[2], TEST_GTFS_FEED_STABLE_IDS[3], ] diff --git a/api/tests/unittest/test_feeds.py b/api/tests/unittest/test_feeds.py index b8fbe0cd8..2b37da40f 100644 --- a/api/tests/unittest/test_feeds.py +++ b/api/tests/unittest/test_feeds.py @@ -4,6 +4,16 @@ from database.database import Database from database_gen.sqlacodegen_models import Feed, Externalid, Location, Gtfsdataset +redirect_target_id = "test_target_id" +redirect_comment = "Some comment" +expected_redirect_response = {"target_id": redirect_target_id, "comment": redirect_comment} + + +def check_redirect(response: dict): + assert ( + response["redirects"][0] == expected_redirect_response + ), f'Response feed redirect was {response["redirects"][0]} instead of {expected_redirect_response}' + def test_feeds_get(client: TestClient, mocker): """ @@ -13,7 +23,7 @@ def test_feeds_get(client: TestClient, mocker): mock_feed = Feed(stable_id="test_id") mock_external_id = Externalid(associated_id="test_associated_id", source="test_source") - mock_select.return_value = [[(mock_feed, "test_target_id", mock_external_id)]] + mock_select.return_value = [[(mock_feed, redirect_target_id, mock_external_id, redirect_comment)]] response = client.request( "GET", "/v1/feeds", @@ -30,9 +40,7 @@ def test_feeds_get(client: TestClient, mocker): assert ( response_feed["external_ids"][0]["source"] == "test_source" ), f'Response feed source was {response_feed["external_ids"][0]["source"]} instead of test_source' - assert ( - response_feed["redirects"][0] == "test_target_id" - ), f'Response feed redirect was {response_feed["redirects"][0]} instead of test_target_id' + check_redirect(response_feed) def test_feed_get(client: TestClient, mocker): @@ -42,7 +50,7 @@ def test_feed_get(client: TestClient, mocker): mock_select = mocker.patch.object(Database(), "select") mock_feed = Feed(stable_id="test_id") mock_external_id = Externalid(associated_id="test_associated_id", source="test_source") - mock_select.return_value = [[(mock_feed, "test_target_id", mock_external_id)]] + mock_select.return_value = [[(mock_feed, redirect_target_id, mock_external_id, redirect_comment)]] response = client.request( "GET", @@ -60,9 +68,8 @@ def test_feed_get(client: TestClient, mocker): assert ( response_feed["external_ids"][0]["source"] == "test_source" ), f'Response feed source was {response_feed["external_ids"][0]["source"]} instead of test_source' - assert ( - response_feed["redirects"][0] == "test_target_id" - ), f'Response feed redirect was {response_feed["redirects"][0]} instead of test_target_id' + + check_redirect(response_feed) def test_gtfs_feeds_get(client: TestClient, mocker): @@ -79,7 +86,9 @@ def test_gtfs_feeds_get(client: TestClient, mocker): subdivision_name="test_subdivision_name", municipality="test_municipality", ) - mock_select.return_value = [[(mock_feed, "test_target_id", mock_external_id, mock_latest_datasets, mock_locations)]] + mock_select.return_value = [ + [(mock_feed, redirect_target_id, mock_external_id, redirect_comment, mock_latest_datasets, mock_locations)] + ] response = client.request( "GET", @@ -100,9 +109,7 @@ def test_gtfs_feeds_get(client: TestClient, mocker): assert ( response_gtfs_feed["external_ids"][0]["source"] == "test_source" ), f'Response feed source was {response_gtfs_feed["external_ids"][0]["source"]} instead of test_source' - assert ( - response_gtfs_feed["redirects"][0] == "test_target_id" - ), f'Response feed redirect was {response_gtfs_feed["redirects"][0]} instead of test_target_id' + check_redirect(response_gtfs_feed) assert ( response_gtfs_feed["locations"][0]["country_code"] == "test_country_code" ), f'Response feed country code was {response_gtfs_feed["locations"][0]["country_code"]} \ @@ -139,7 +146,9 @@ def test_gtfs_feed_get(client: TestClient, mocker): subdivision_name="test_subdivision_name", municipality="test_municipality", ) - mock_select.return_value = [[(mock_feed, "test_target_id", mock_external_id, mock_latest_datasets, mock_locations)]] + mock_select.return_value = [ + [(mock_feed, redirect_target_id, mock_external_id, redirect_comment, mock_latest_datasets, mock_locations)] + ] response = client.request( "GET", @@ -160,9 +169,9 @@ def test_gtfs_feed_get(client: TestClient, mocker): assert ( response_gtfs_feed["external_ids"][0]["source"] == "test_source" ), f'Response feed source was {response_gtfs_feed["external_ids"][0]["source"]} instead of test_source' - assert ( - response_gtfs_feed["redirects"][0] == "test_target_id" - ), f'Response feed redirect was {response_gtfs_feed["redirects"][0]} instead of test_target_id' + + check_redirect(response_gtfs_feed) + assert ( response_gtfs_feed["locations"][0]["country_code"] == "test_country_code" ), f'Response feed country code was {response_gtfs_feed["locations"][0]["country_code"]} \ @@ -195,7 +204,7 @@ def test_gtfs_rt_feeds_get(client: TestClient, mocker): mock_entity_types = "test_entity_type" mock_feed_references = "test_feed_reference" mock_select.return_value = [ - [(mock_feed, "test_target_id", mock_external_id, mock_entity_types, mock_feed_references)] + [(mock_feed, redirect_target_id, mock_external_id, redirect_comment, mock_entity_types, mock_feed_references)] ] response = client.request( @@ -217,9 +226,9 @@ def test_gtfs_rt_feeds_get(client: TestClient, mocker): assert ( response_gtfs_rt_feed["external_ids"][0]["source"] == "test_source" ), f'Response feed source was {response_gtfs_rt_feed["external_ids"][0]["source"]} instead of test_source' - assert ( - response_gtfs_rt_feed["redirects"][0] == "test_target_id" - ), f'Response feed redirect was {response_gtfs_rt_feed["redirects"][0]} instead of test_target_id' + + check_redirect(response_gtfs_rt_feed) + assert ( response_gtfs_rt_feed["entity_types"][0] == "test_entity_type" ), f'Response feed entity type was {response_gtfs_rt_feed["entity_types"][0]} instead of test_entity_type' @@ -238,7 +247,7 @@ def test_gtfs_rt_feed_get(client: TestClient, mocker): mock_entity_types = "test_entity_type" mock_feed_references = "test_feed_reference" mock_select.return_value = [ - [(mock_feed, "test_target_id", mock_external_id, mock_entity_types, mock_feed_references)] + [(mock_feed, redirect_target_id, mock_external_id, redirect_comment, mock_entity_types, mock_feed_references)] ] response = client.request( @@ -260,9 +269,9 @@ def test_gtfs_rt_feed_get(client: TestClient, mocker): assert ( response_gtfs_rt_feed["external_ids"][0]["source"] == "test_source" ), f'Response feed source was {response_gtfs_rt_feed["external_ids"][0]["source"]} instead of test_source' - assert ( - response_gtfs_rt_feed["redirects"][0] == "test_target_id" - ), f'Response feed redirect was {response_gtfs_rt_feed["redirects"][0]} instead of test_target_id' + + check_redirect(response_gtfs_rt_feed) + assert ( response_gtfs_rt_feed["entity_types"][0] == "test_entity_type" ), f'Response feed entity type was {response_gtfs_rt_feed["entity_types"][0]} instead of test_entity_type' diff --git a/docs/DatabaseCatalogAPI.yaml b/docs/DatabaseCatalogAPI.yaml index a70030bfe..186534bee 100644 --- a/docs/DatabaseCatalogAPI.yaml +++ b/docs/DatabaseCatalogAPI.yaml @@ -233,13 +233,24 @@ paths: components: schemas: + Redirect: + type: object + properties: + target_id: + description: ID of the feed to which the current feed is being redirected. + type: string + example: mdb-10 + comment: + description: A comment explaining the redirect. + type: string + example: Redirected because of a change of URL. BasicFeed: type: object properties: id: description: Unique identifier used as a key for the feeds table. type: string - example: mdb_10 + example: mdb-10 data_type: type: string enum: @@ -285,11 +296,7 @@ components: redirects: type: array items: - description: > - The feed ID that should be used in replacement of the current one. - A redirect is defined when several expired feeds are aggregated together or split into new feeds. - type: string - example: mdb-100 + $ref: "#/components/schemas/Redirect" GtfsFeed: allOf: diff --git a/docs/DatabaseCatalogAPI_IAP.yaml b/docs/DatabaseCatalogAPI_IAP.yaml index 4d70f446a..4ee200b6f 100644 --- a/docs/DatabaseCatalogAPI_IAP.yaml +++ b/docs/DatabaseCatalogAPI_IAP.yaml @@ -233,13 +233,24 @@ paths: components: schemas: + Redirect: + type: object + properties: + target_id: + description: ID of the feed to which the current feed is being redirected. + type: string + example: mdb-10 + comment: + description: A comment explaining the redirect. + type: string + example: Redirected because of a change of URL. BasicFeed: type: object properties: id: description: Unique identifier used as a key for the feeds table. type: string - example: mdb_10 + example: mdb-10 data_type: type: string enum: @@ -285,11 +296,7 @@ components: redirects: type: array items: - description: > - The feed ID that should be used in replacement of the current one. - A redirect is defined when several expired feeds are aggregated together or split into new feeds. - type: string - example: mdb-100 + $ref: "#/components/schemas/Redirect" GtfsFeed: allOf: diff --git a/liquibase/changelog.xml b/liquibase/changelog.xml index 9d74c3b35..e2cb98160 100644 --- a/liquibase/changelog.xml +++ b/liquibase/changelog.xml @@ -14,4 +14,5 @@ + \ No newline at end of file diff --git a/liquibase/changes/feat_149.sql b/liquibase/changes/feat_149.sql new file mode 100644 index 000000000..ee736274e --- /dev/null +++ b/liquibase/changes/feat_149.sql @@ -0,0 +1,2 @@ +ALTER TABLE RedirectingID +ADD COLUMN redirect_comment VARCHAR(255); \ No newline at end of file