diff --git a/changelog.d/16648.misc b/changelog.d/16648.misc new file mode 100644 index 000000000000..d0cc5bd8cb1a --- /dev/null +++ b/changelog.d/16648.misc @@ -0,0 +1 @@ +Add a test which ensures that new tables get PRIMARY KEYs. \ No newline at end of file diff --git a/tests/storage/test_database.py b/tests/storage/test_database.py index d60176b1d4f7..97e21fca157a 100644 --- a/tests/storage/test_database.py +++ b/tests/storage/test_database.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Callable, List, Tuple +from typing import Callable, List, Set, Tuple from unittest.mock import Mock, call from twisted.internet import defer @@ -361,3 +361,136 @@ def _list_tables_with_missing_replica_identities_txn( 0, f"The following tables in the {pool.name()!r} database are missing REPLICA IDENTITIES: {missing!r}.", ) + + +class PostgresPrimaryKeyTestCase(unittest.HomeserverTestCase): + if not USE_POSTGRES_FOR_TESTS: + skip = "Requires Postgres" + + # Some tables are exempt, mostly for historical reasons + EXEMPT_TABLES = { + "appservice_room_list", + "batch_events", + "blocked_rooms", + "cache_invalidation_stream_by_instance", + "current_state_delta_stream", + "deleted_pushers", + "device_auth_providers", + "device_federation_inbox", + "device_federation_outbox", + "device_inbox", + "device_lists_changes_in_room", + "device_lists_outbound_last_success", + "device_lists_outbound_pokes", + "device_lists_remote_cache", + "device_lists_remote_extremeties", + "device_lists_remote_resync", + "device_lists_stream", + "e2e_cross_signing_keys", + "e2e_cross_signing_signatures", + "e2e_room_keys", + "e2e_room_keys_versions", + "erased_users", + "event_auth", + "event_auth_chain_links", + "event_push_actions_staging", + "event_push_summary", + "event_relations", + "event_search", + "federation_inbound_events_staging", + "federation_stream_position", + "ignored_users", + "insertion_event_edges", + "insertion_event_extremities", + "insertion_events", + "local_media_repository_thumbnails", + "local_media_repository_url_cache", + "monthly_active_users", + "presence_stream", + "push_rules_stream", + "ratelimit_override", + "remote_media_cache_thumbnails", + "room_alias_servers", + "room_stats_earliest_token", + "room_stats_state", + "state_group_edges", + "state_groups_state", + "stream_ordering_to_exterm", + "stream_positions", + "threepid_guest_access_tokens", + "timeline_gaps", + "user_daily_visits", + "user_directory", + "user_directory_search", + "user_filters", + "user_ips", + "user_signature_stream", + "user_threepid_id_server", + "users_in_public_rooms", + "users_pending_deactivation", + "users_who_share_private_rooms", + "worker_locks", + } + + def prepare( + self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer + ) -> None: + self.db_pools = homeserver.get_datastores().databases + + def test_all_nonexempt_tables_have_primary_key(self) -> None: + """ + Tests that all tables have a PRIMARY KEY. + + Whilst there are occasionally potentially good reasons to not have one, + we should default to having PRIMARY KEYs on new tables. + + Historically we have not created PRIMARY KEYs when they should be created + (sometimes even creating a UNIQUE index on the same columns instead). + + It is preferable to use PRIMARY KEYs instead of UNIQUE indices where possible + because this: + + - provides a default REPLICA IDENTITY, ensuring logical replication works + without extra complication; + - is sometimes more helpful for understanding; and + - is more useful to external tools which expect PRIMARY KEYs (e.g. some database + migration tools on some cloud providers do not work with tables without PRIMARY + KEYs) + """ + + sql = """ + SELECT tbl.table_name + FROM information_schema.tables tbl + WHERE table_type = 'BASE TABLE' + AND table_schema not in ('pg_catalog', 'information_schema') + AND NOT EXISTS ( + SELECT 1 + FROM information_schema.key_column_usage kcu + WHERE kcu.table_name = tbl.table_name + AND kcu.table_schema = tbl.table_schema + ) + """ + + def _list_tables_with_missing_primary_keys_txn( + txn: LoggingTransaction, + ) -> Set[str]: + txn.execute(sql) + return {table_name for table_name, in txn} + + for pool in self.db_pools: + missing = sorted( + self.get_success( + pool.runInteraction( + "test_list_missing_primary_keys", + _list_tables_with_missing_primary_keys_txn, + ) + ) + - self.EXEMPT_TABLES + ) + self.assertEqual( + len(missing), + 0, + f"The following tables in the {pool.name()!r} database are missing PRIMARY KEYs: {missing!r}.\n" + f"Please consider adding a primary key if it makes sense to do so — this gives the table a replica identity for free and helps humans and tools to understand the table better.\n" + f"If this lack of a primary key is intentional, please add an exemption.", + )