From 329244616954c6f3e174667921bd2f652fa0d907 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 15 Jul 2024 11:40:30 +0200 Subject: [PATCH 1/4] public_inbox: Fix type hints Some type hints were set to return a given type, while the function was actually returning that type, or a None object. Fix the type hints when relevant. Signed-off-by: Maxime Ripard --- did/plugins/public_inbox.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/did/plugins/public_inbox.py b/did/plugins/public_inbox.py index 888f41c5..c6204986 100644 --- a/did/plugins/public_inbox.py +++ b/did/plugins/public_inbox.py @@ -29,17 +29,17 @@ class Message(object): def __init__(self, msg: mailbox.mboxMessage) -> None: self.msg = msg - def __msg_id(self, keyid: str) -> str: + def __msg_id(self, keyid: str) -> typing.Optional[str]: msgid = self.msg[keyid] if msgid is None: return None return msgid.lstrip("<").rstrip(">") - def id(self) -> str: + def id(self) -> typing.Optional[str]: return self.__msg_id("Message-Id") - def parent_id(self) -> str: + def parent_id(self) -> typing.Optional[str]: return self.__msg_id("In-Reply-To") def subject(self) -> str: @@ -129,7 +129,7 @@ def __get_msgs_from_mbox(self, mbox: mailbox.mbox) -> list[Message]: return msgs - def __fetch_thread_root(self, msg: Message) -> Message: + def __fetch_thread_root(self, msg: Message) -> typing.Optional[Message]: msg_id = msg.id() url = self.__get_url("/all/%s/t.mbox.gz" % msg_id) @@ -141,7 +141,7 @@ def __fetch_thread_root(self, msg: Message) -> Message: log.debug("Found message %s thread root: %s." % (msg_id, msg.id())) return msg - def __get_thread_root(self, msg: Message) -> Message: + def __get_thread_root(self, msg: Message) -> typing.Optional[Message]: log.debug("Looking for thread root of message %s" % msg.id()) if msg.is_thread_root(): log.debug("Message is thread root already. Returning.") From 14fedc2d2a275e8257b1605650aeed7cd8f614a2 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 15 Jul 2024 11:46:29 +0200 Subject: [PATCH 2/4] public_inbox: Improve log on poorly constructed messages Some messages will not be perfect, and will have inconsistent or missing headers. We were mostly handling them fine, but didn't log anything when that was happening, making debugging issues fairly difficult. Let's improve the logging a bit. Signed-off-by: Maxime Ripard --- did/plugins/public_inbox.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/did/plugins/public_inbox.py b/did/plugins/public_inbox.py index c6204986..da006f8e 100644 --- a/did/plugins/public_inbox.py +++ b/did/plugins/public_inbox.py @@ -32,6 +32,7 @@ def __init__(self, msg: mailbox.mboxMessage) -> None: def __msg_id(self, keyid: str) -> typing.Optional[str]: msgid = self.msg[keyid] if msgid is None: + log.debug("Missing header %s" % keyid) return None return msgid.lstrip("<").rstrip(">") @@ -141,6 +142,8 @@ def __fetch_thread_root(self, msg: Message) -> typing.Optional[Message]: log.debug("Found message %s thread root: %s." % (msg_id, msg.id())) return msg + log.warn("Couldn't find message root") + def __get_thread_root(self, msg: Message) -> typing.Optional[Message]: log.debug("Looking for thread root of message %s" % msg.id()) if msg.is_thread_root(): From 7b964541375f1ad927d3b2e6b94dbf8ddb349dfc Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 15 Jul 2024 11:49:27 +0200 Subject: [PATCH 3/4] public_inbox: Make returning None explicit The PublicInbox.__fetch_thread_root method returns None implicitly at the end of the function. Let's use an explicit return statement to make it more obvious. Signed-off-by: Maxime Ripard --- did/plugins/public_inbox.py | 1 + 1 file changed, 1 insertion(+) diff --git a/did/plugins/public_inbox.py b/did/plugins/public_inbox.py index da006f8e..3279c61f 100644 --- a/did/plugins/public_inbox.py +++ b/did/plugins/public_inbox.py @@ -143,6 +143,7 @@ def __fetch_thread_root(self, msg: Message) -> typing.Optional[Message]: return msg log.warn("Couldn't find message root") + return None def __get_thread_root(self, msg: Message) -> typing.Optional[Message]: log.debug("Looking for thread root of message %s" % msg.id()) From 240b72c31f089215e0551c594459fbbbce43430f Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 15 Jul 2024 12:08:32 +0200 Subject: [PATCH 4/4] public_inbox: Handle missing parent is __get_thread_root() A message can have an In-Reply-To header but the message it points to might not be available for some reason. In such a case, the Message.parent_id() method will return a proper value, but __fetch_thread_root() on that message might not and would return None. Let's handle that case by returning the latest message we found as the root message. Signed-off-by: Maxime Ripard --- did/plugins/public_inbox.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/did/plugins/public_inbox.py b/did/plugins/public_inbox.py index 3279c61f..984aa57a 100644 --- a/did/plugins/public_inbox.py +++ b/did/plugins/public_inbox.py @@ -145,7 +145,7 @@ def __fetch_thread_root(self, msg: Message) -> typing.Optional[Message]: log.warn("Couldn't find message root") return None - def __get_thread_root(self, msg: Message) -> typing.Optional[Message]: + def __get_thread_root(self, msg: Message) -> Message: log.debug("Looking for thread root of message %s" % msg.id()) if msg.is_thread_root(): log.debug("Message is thread root already. Returning.") @@ -154,6 +154,10 @@ def __get_thread_root(self, msg: Message) -> typing.Optional[Message]: parent_id = msg.parent_id() if parent_id not in self.messages_cache: root = self.__fetch_thread_root(msg) + if root is None: + log.debug("Can't retrieve the thread root, returning.") + return msg + log.debug("Found root message %s for message %s" % (root.id(), msg.id())) return root @@ -167,7 +171,11 @@ def __get_thread_root(self, msg: Message) -> typing.Optional[Message]: parent_id = parent.parent_id() if parent_id not in self.messages_cache: - root = self.__fetch_thread_root(msg) + root = self.__fetch_thread_root(parent) + if root is None: + log.debug("Can't retrieve the message parent, returning.") + return parent + log.debug( "Found root message %s for message %s" % (root.id(), msg.id()))