From 5575637b4cedbf20ea37bb928f63ed98864cbaa8 Mon Sep 17 00:00:00 2001 From: Andy Balaam Date: Fri, 6 Dec 2024 15:27:15 +0000 Subject: [PATCH] feat(utds): Provide the reason why an event was an expected UTD --- crates/matrix-sdk-crypto/CHANGELOG.md | 7 + .../src/types/events/utd_cause.rs | 313 ++++++++++++------ .../matrix-sdk-ui/src/timeline/tests/mod.rs | 2 + crates/matrix-sdk/src/room/mod.rs | 25 +- 4 files changed, 237 insertions(+), 110 deletions(-) diff --git a/crates/matrix-sdk-crypto/CHANGELOG.md b/crates/matrix-sdk-crypto/CHANGELOG.md index 92e18098859..32716cf914a 100644 --- a/crates/matrix-sdk-crypto/CHANGELOG.md +++ b/crates/matrix-sdk-crypto/CHANGELOG.md @@ -11,6 +11,13 @@ All notable changes to this project will be documented in this file. when the sender either did not wish to share or was unable to share the room_key. ([#4305](https://github.com/matrix-org/matrix-rust-sdk/pull/4305)) +- `UtdCause` has two new variants that replace the existing `HistoricalMessage`: + `HistoricalMessageAndBackupIsDisabled` and `HistoricalMessageAndDeviceIsUnverified`. + These give more detail about what went wrong and allow us to suggest to users + what actions they can take to fix the problem. See the doc comments on these + variants for suggested wording. + ([#4384](https://github.com/matrix-org/matrix-rust-sdk/pull/4384)) + ## [0.8.0] - 2024-11-19 ### Features diff --git a/crates/matrix-sdk-crypto/src/types/events/utd_cause.rs b/crates/matrix-sdk-crypto/src/types/events/utd_cause.rs index bef568ea10c..c6c174e1236 100644 --- a/crates/matrix-sdk-crypto/src/types/events/utd_cause.rs +++ b/crates/matrix-sdk-crypto/src/types/events/utd_cause.rs @@ -24,6 +24,15 @@ use serde::Deserialize; pub enum UtdCause { /// We don't have an explanation for why this UTD happened - it is probably /// a bug, or a network split between the two homeservers. + /// + /// For example: + /// + /// - the keys for this event are missing, but a key storage backup exists + /// and is working, so we should be able to find the keys in the backup. + /// + /// - the keys for this event are missing, and a key storage backup exists + /// on the server, but that backup is not working on this client even + /// though this device is verified. #[default] Unknown = 0, @@ -49,14 +58,16 @@ pub enum UtdCause { UnknownDevice = 4, /// We are missing the keys for this event, but it is a "device-historical" - /// message and no backup is accessible or usable. + /// message and key storage backup is disabled. /// /// Device-historical means that the message was sent before the current /// device existed (but the current user was probably a member of the room /// at the time the message was sent). Not to /// be confused with pre-join or pre-invite messages (see /// [`UtdCause::SentBeforeWeJoined`] for that). - HistoricalMessage = 5, + /// + /// Expected message to user: "History is not available on this device". + HistoricalMessageAndBackupIsDisabled = 5, /// The keys for this event are intentionally withheld. /// @@ -70,6 +81,19 @@ pub enum UtdCause { /// this device by cherry-picking and blocking it, in which case, no action /// can be taken on our side. WithheldBySender = 7, + + /// We are missing the keys for this event, but it is a "device-historical" + /// message, and even though a key storage backup does exist, we can't use + /// it because our device is unverified. + /// + /// Device-historical means that the message was sent before the current + /// device existed (but the current user was probably a member of the room + /// at the time the message was sent). Not to + /// be confused with pre-join or pre-invite messages (see + /// [`UtdCause::SentBeforeWeJoined`] for that). + /// + /// Expected message to user: "You need to verify this device". + HistoricalMessageAndDeviceIsUnverified = 8, } /// MSC4115 membership info in the unsigned area. @@ -97,6 +121,14 @@ pub struct CryptoContextInfo { /// if an event is device-historical or not (sent before the current device /// existed). pub device_creation_ts: MilliSecondsSinceUnixEpoch, + + /// True if this device is secure because it has been verified by us + pub this_device_is_verified: bool, + + /// True if key storage exists on the server, even if we are unable to use + /// it + pub backup_exists_on_server: bool, + /// True if key storage is correctly set up and can be used by the current /// client to download and decrypt message keys. pub is_backup_configured: bool, @@ -134,14 +166,9 @@ impl UtdCause { } if let Ok(timeline_event) = raw_event.deserialize() { - if crypto_context_info.is_backup_configured - && timeline_event.origin_server_ts() - < crypto_context_info.device_creation_ts - { - // It's a device-historical message and there is no accessible - // backup. The key is missing and it - // is expected. - return UtdCause::HistoricalMessage; + if timeline_event.origin_server_ts() < crypto_context_info.device_creation_ts { + // This event was sent before this device existed, so it is "historical" + return UtdCause::determine_historical(crypto_context_info); } } @@ -163,6 +190,44 @@ impl UtdCause { _ => UtdCause::Unknown, } } + + /** + * Below is the flow chart we follow for deciding whether historical + * UTDs are expected. This function starts at position `B`. + * + * ```mermaid + * flowchart TD + * A{Is the message newer than the device?} + * A -- No --> B + * A -- Yes --> X + * B{Is there a backup on the server?} + * B -- No --> Y + * B -- Yes --> C + * C{Is backup working on this device?} + * C -- No --> D + * C -- Yes --> X + * D{Is this device verified?} + * D -- No --> Z + * D -- Yes --> X + * + * X([Normal UTD error]) + * Y([History is not available on this device]) + * Z([You need to verify this device]) + * ``` + */ + fn determine_historical(crypto_context_info: CryptoContextInfo) -> UtdCause { + if crypto_context_info.backup_exists_on_server { + if crypto_context_info.is_backup_configured + || crypto_context_info.this_device_is_verified + { + UtdCause::Unknown + } else { + UtdCause::HistoricalMessageAndDeviceIsUnverified + } + } else { + UtdCause::HistoricalMessageAndBackupIsDisabled + } + } } #[cfg(test)] @@ -183,11 +248,7 @@ mod tests { fn test_if_there_is_no_membership_info_we_guess_unknown() { // If our JSON contains no membership info, then we guess the UTD is unknown. assert_eq!( - UtdCause::determine( - &raw_event(json!({})), - device_old_no_backup(), - &missing_megolm_session() - ), + UtdCause::determine(&raw_event(json!({})), device_old(), &missing_megolm_session()), UtdCause::Unknown ); } @@ -199,7 +260,7 @@ mod tests { assert_eq!( UtdCause::determine( &raw_event(json!({ "unsigned": { "membership": 3 } })), - device_new_with_backup(), + device_old(), &missing_megolm_session() ), UtdCause::Unknown @@ -213,7 +274,7 @@ mod tests { assert_eq!( UtdCause::determine( &raw_event(json!({ "unsigned": { "membership": "invite" } }),), - device_new_with_backup(), + device_old(), &missing_megolm_session() ), UtdCause::Unknown @@ -227,7 +288,7 @@ mod tests { assert_eq!( UtdCause::determine( &raw_event(json!({ "unsigned": { "membership": "join" } })), - device_new_with_backup(), + device_old(), &missing_megolm_session() ), UtdCause::Unknown @@ -241,7 +302,7 @@ mod tests { assert_eq!( UtdCause::determine( &raw_event(json!({ "unsigned": { "membership": "leave" } })), - device_new_with_backup(), + device_old(), &missing_megolm_session() ), UtdCause::SentBeforeWeJoined @@ -256,7 +317,7 @@ mod tests { assert_eq!( UtdCause::determine( &raw_event(json!({ "unsigned": { "membership": "leave" } })), - device_new_with_backup(), + device_old(), &malformed_encrypted_event() ), UtdCause::Unknown @@ -269,7 +330,7 @@ mod tests { assert_eq!( UtdCause::determine( &raw_event(json!({ "unsigned": { "io.element.msc4115.membership": "leave" } })), - device_new_with_backup(), + device_old(), &missing_megolm_session() ), UtdCause::SentBeforeWeJoined @@ -279,11 +340,7 @@ mod tests { #[test] fn test_verification_violation_is_passed_through() { assert_eq!( - UtdCause::determine( - &raw_event(json!({})), - device_new_with_backup(), - &verification_violation() - ), + UtdCause::determine(&raw_event(json!({})), device_old(), &verification_violation()), UtdCause::VerificationViolation ); } @@ -291,11 +348,7 @@ mod tests { #[test] fn test_unsigned_device_is_passed_through() { assert_eq!( - UtdCause::determine( - &raw_event(json!({})), - device_new_with_backup(), - &unsigned_device() - ), + UtdCause::determine(&raw_event(json!({})), device_old(), &unsigned_device()), UtdCause::UnsignedDevice ); } @@ -303,97 +356,159 @@ mod tests { #[test] fn test_unknown_device_is_passed_through() { assert_eq!( - UtdCause::determine(&raw_event(json!({})), device_new_with_backup(), &missing_device()), + UtdCause::determine(&raw_event(json!({})), device_old(), &missing_device()), UtdCause::UnknownDevice ); } #[test] fn test_old_devices_dont_cause_historical_utds() { - // If the device is old, we say this UTD is unexpected (missing megolm session) - assert_eq!( - UtdCause::determine(&utd_event(), device_old_with_backup(), &missing_megolm_session()), - UtdCause::Unknown - ); + // Message key is missing. + let info = missing_megolm_session(); + + // The device is old. + let context = device_old(); + + // So we have no explanation for this UTD. + assert_eq!(UtdCause::determine(&utd_event(), context.clone(), &info), UtdCause::Unknown); // Same for unknown megolm message index - assert_eq!( - UtdCause::determine( - &utd_event(), - device_old_with_backup(), - &unknown_megolm_message_index() - ), - UtdCause::Unknown - ); + let info = unknown_megolm_message_index(); + assert_eq!(UtdCause::determine(&utd_event(), context, &info), UtdCause::Unknown); } #[test] - fn test_new_devices_cause_historical_utds() { - // If the device is old, we say this UTD is expected historical (missing megolm - // session) + fn test_if_backup_is_disabled_historical_utd_is_expected() { + // Message key is missing. + let info = missing_megolm_session(); + + // The device is new. + let mut context = device_new(); + + // There is no key storage backup on the server. + context.backup_exists_on_server = false; + + // So this UTD is expected, and the solution (for future messages!) is to turn + // on key storage backups. assert_eq!( - UtdCause::determine(&utd_event(), device_new_with_backup(), &missing_megolm_session()), - UtdCause::HistoricalMessage + UtdCause::determine(&utd_event(), context.clone(), &info), + UtdCause::HistoricalMessageAndBackupIsDisabled ); // Same for unknown megolm message index + let info = unknown_megolm_message_index(); assert_eq!( - UtdCause::determine( - &utd_event(), - device_new_with_backup(), - &unknown_megolm_message_index() - ), - UtdCause::HistoricalMessage + UtdCause::determine(&utd_event(), context, &info), + UtdCause::HistoricalMessageAndBackupIsDisabled ); } #[test] fn test_malformed_events_are_never_expected_utds() { - // Even if the device is new, if the reason for the UTD is a malformed event, - // it's an unexpected UTD. - assert_eq!( - UtdCause::determine( - &utd_event(), - device_new_with_backup(), - &malformed_encrypted_event() - ), - UtdCause::Unknown - ); + // The event was malformed. + let info = malformed_encrypted_event(); + + // The device is new. + let mut context = device_new(); + + // There is no key storage backup on the server. + context.backup_exists_on_server = false; + + // So this could be expected historical like the previous test, but because the + // encrypted event is malformed, that takes precedence, and it's unexpected. + assert_eq!(UtdCause::determine(&utd_event(), context.clone(), &info), UtdCause::Unknown); // Same for decryption failures - assert_eq!( - UtdCause::determine( - &utd_event(), - device_new_with_backup(), - &megolm_decryption_failure() - ), - UtdCause::Unknown - ); + let info = megolm_decryption_failure(); + assert_eq!(UtdCause::determine(&utd_event(), context, &info), UtdCause::Unknown); } #[test] - fn test_if_backup_is_disabled_this_utd_is_unexpected() { - // If the backup is disables, even if the device is new and the reason for the - // UTD is missing keys, we still treat this UTD as unexpected. - // - // TODO: I (AJB) think this is wrong, but it will be addressed as part of - // https://github.com/element-hq/element-meta/issues/2638 + fn test_new_devices_with_nonworking_backups_because_unverified_cause_expected_utds() { + // Message key is missing. + let info = missing_megolm_session(); + + // The device is new. + let mut context = device_new(); + + // There is a key storage backup on the server. + context.backup_exists_on_server = true; + + // The key storage backup is not working, + context.is_backup_configured = false; + + // probably because... + // Our device is not verified. + context.this_device_is_verified = false; + + // So this UTD is expected, and the solution is (hopefully) to verify. assert_eq!( - UtdCause::determine(&utd_event(), device_new_no_backup(), &missing_megolm_session()), - UtdCause::Unknown + UtdCause::determine(&utd_event(), context, &info), + UtdCause::HistoricalMessageAndDeviceIsUnverified ); // Same for unknown megolm message index + let info = unknown_megolm_message_index(); assert_eq!( - UtdCause::determine( - &utd_event(), - device_new_no_backup(), - &unknown_megolm_message_index() - ), - UtdCause::Unknown + UtdCause::determine(&utd_event(), context, &info), + UtdCause::HistoricalMessageAndDeviceIsUnverified ); } + #[test] + fn test_if_backup_is_working_then_historical_utd_is_unexpected() { + // Message key is missing. + let info = missing_megolm_session(); + + // The device is new. + let mut context = device_new(); + + // There is a key storage backup on the server. + context.backup_exists_on_server = true; + + // The key storage backup is working. + context.is_backup_configured = true; + + // So this UTD is unexpected since we should be able to fetch the key from + // storage. + assert_eq!(UtdCause::determine(&utd_event(), context.clone(), &info), UtdCause::Unknown); + + // Same for unknown megolm message index + let info = unknown_megolm_message_index(); + assert_eq!(UtdCause::determine(&utd_event(), context, &info), UtdCause::Unknown); + } + + #[test] + fn test_if_backup_is_not_working_even_though_verified_then_historical_utd_is_unexpected() { + // Message key is missing. + let info = missing_megolm_session(); + + // The device is new. + let mut context = device_new(); + + // There is a key storage backup on the server. + context.backup_exists_on_server = true; + + // The key storage backup is working. + context.is_backup_configured = false; + + // even though... + // Our device is verified. + context.this_device_is_verified = true; + + // So this UTD is unexpected since we can't explain why our backup is not + // working. + // + // TODO: it might be nice to tell the user that our backup is not working! + // Currently we don't distinguish between Unknown cases, since we want + // to make sure they are all reported as unexpected UTDs. + assert_eq!(UtdCause::determine(&utd_event(), context.clone(), &info), UtdCause::Unknown); + + // Same for unknown megolm message index + let info = unknown_megolm_message_index(); + assert_eq!(UtdCause::determine(&utd_event(), context, &info), UtdCause::Unknown); + } + fn utd_event() -> Raw { raw_event(json!({ "type": "m.room.encrypted", @@ -416,31 +531,21 @@ mod tests { Raw::from_json(to_raw_value(&value).unwrap()) } - fn device_old_no_backup() -> CryptoContextInfo { + fn device_old() -> CryptoContextInfo { CryptoContextInfo { device_creation_ts: MilliSecondsSinceUnixEpoch((BEFORE_EVENT_TIME).try_into().unwrap()), + this_device_is_verified: false, is_backup_configured: false, + backup_exists_on_server: false, } } - fn device_old_with_backup() -> CryptoContextInfo { - CryptoContextInfo { - device_creation_ts: MilliSecondsSinceUnixEpoch((BEFORE_EVENT_TIME).try_into().unwrap()), - is_backup_configured: true, - } - } - - fn device_new_no_backup() -> CryptoContextInfo { + fn device_new() -> CryptoContextInfo { CryptoContextInfo { device_creation_ts: MilliSecondsSinceUnixEpoch((AFTER_EVENT_TIME).try_into().unwrap()), + this_device_is_verified: false, is_backup_configured: false, - } - } - - fn device_new_with_backup() -> CryptoContextInfo { - CryptoContextInfo { - device_creation_ts: MilliSecondsSinceUnixEpoch((AFTER_EVENT_TIME).try_into().unwrap()), - is_backup_configured: true, + backup_exists_on_server: false, } } diff --git a/crates/matrix-sdk-ui/src/timeline/tests/mod.rs b/crates/matrix-sdk-ui/src/timeline/tests/mod.rs index f708da78664..eb6601c167d 100644 --- a/crates/matrix-sdk-ui/src/timeline/tests/mod.rs +++ b/crates/matrix-sdk-ui/src/timeline/tests/mod.rs @@ -387,6 +387,8 @@ impl RoomDataProvider for TestRoomDataProvider { ) .unwrap_or(MilliSecondsSinceUnixEpoch::now()), is_backup_configured: false, + this_device_is_verified: true, + backup_exists_on_server: true, }) .boxed() } diff --git a/crates/matrix-sdk/src/room/mod.rs b/crates/matrix-sdk/src/room/mod.rs index a5908d29897..801987f958d 100644 --- a/crates/matrix-sdk/src/room/mod.rs +++ b/crates/matrix-sdk/src/room/mod.rs @@ -620,13 +620,26 @@ impl Room { #[cfg(feature = "e2e-encryption")] pub async fn crypto_context_info(&self) -> CryptoContextInfo { let encryption = self.client.encryption(); + + let (device_creation_ts, this_device_is_verified) = match encryption.get_own_device().await + { + Ok(Some(device)) => { + (device.first_time_seen_ts(), device.is_verified_with_cross_signing()) + } + + // Should not happen, there will always be an own device + _ => (MilliSecondsSinceUnixEpoch::now(), true), + }; + + let is_backup_configured = encryption.backups().state() == BackupState::Enabled; + let backup_exists_on_server = + encryption.backups().exists_on_server().await.unwrap_or(false); + CryptoContextInfo { - device_creation_ts: match encryption.get_own_device().await { - Ok(Some(device)) => device.first_time_seen_ts(), - // Should not happen, there will always be an own device - _ => MilliSecondsSinceUnixEpoch::now(), - }, - is_backup_configured: encryption.backups().state() == BackupState::Enabled, + device_creation_ts, + this_device_is_verified, + is_backup_configured, + backup_exists_on_server, } }