From 989bfd8f070f6c060ba90c79deb4ee4c5c2c3538 Mon Sep 17 00:00:00 2001 From: Sean Whalen Date: Sat, 2 Nov 2024 11:40:37 -0400 Subject: [PATCH] Code cleanup --- parsedmarc/__init__.py | 113 +++++++++++++++++++++------------------ parsedmarc/cli.py | 3 +- parsedmarc/mail/gmail.py | 12 ++--- parsedmarc/mail/graph.py | 33 ++++++------ parsedmarc/mail/imap.py | 4 +- 5 files changed, 85 insertions(+), 80 deletions(-) diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index a83b200b..43a477e5 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -28,8 +28,12 @@ from mailsuite.smtp import send_email from parsedmarc.log import logger -from parsedmarc.mail import MailboxConnection, IMAPConnection, \ - MSGraphConnection, GmailConnection +from parsedmarc.mail import ( + MailboxConnection, + IMAPConnection, + MSGraphConnection, + GmailConnection, +) from parsedmarc.utils import get_base_domain, get_ip_address_info from parsedmarc.utils import is_outlook_msg, convert_outlook_msg from parsedmarc.utils import parse_email @@ -1484,23 +1488,25 @@ def get_dmarc_reports_from_mbox( ) -def get_dmarc_reports_from_mailbox(connection: MailboxConnection, - reports_folder="INBOX", - archive_folder="Archive", - delete=False, - test=False, - ip_db_path=None, - always_use_local_files=False, - reverse_dns_map_path=None, - reverse_dns_map_url=None, - offline=False, - nameservers=None, - dns_timeout=6.0, - strip_attachment_payloads=False, - results=None, - batch_size=10, - since=None, - create_folders=True): +def get_dmarc_reports_from_mailbox( + connection: MailboxConnection, + reports_folder="INBOX", + archive_folder="Archive", + delete=False, + test=False, + ip_db_path=None, + always_use_local_files=False, + reverse_dns_map_path=None, + reverse_dns_map_url=None, + offline=False, + nameservers=None, + dns_timeout=6.0, + strip_attachment_payloads=False, + results=None, + batch_size=10, + since=None, + create_folders=True, +): """ Fetches and parses DMARC reports from a mailbox @@ -1564,42 +1570,44 @@ def get_dmarc_reports_from_mailbox(connection: MailboxConnection, if since: _since = 1440 # default one day - if re.match(r'\d+[mhd]$', since): - s = re.split(r'(\d+)', since) - if s[2] == 'm': + if re.match(r"\d+[mhd]$", since): + s = re.split(r"(\d+)", since) + if s[2] == "m": _since = int(s[1]) - elif s[2] == 'h': - _since = int(s[1])*60 - elif s[2] == 'd': - _since = int(s[1])*60*24 - elif s[2] == 'w': - _since = int(s[1])*60*24*7 + elif s[2] == "h": + _since = int(s[1]) * 60 + elif s[2] == "d": + _since = int(s[1]) * 60 * 24 + elif s[2] == "w": + _since = int(s[1]) * 60 * 24 * 7 else: - logger.warning("Incorrect format for \'since\' option. \ + logger.warning( + "Incorrect format for 'since' option. \ Provided value:{0}, Expected values:(5m|3h|2d|1w). \ - Ignoring option, fetching messages for last 24hrs" \ - "SMTP does not support a time or timezone in since." \ - "See https://www.rfc-editor.org/rfc/rfc3501#page-52" - .format(since)) + Ignoring option, fetching messages for last 24hrs" + "SMTP does not support a time or timezone in since." + "See https://www.rfc-editor.org/rfc/rfc3501#page-52".format(since) + ) if isinstance(connection, IMAPConnection): - logger.debug("Only days and weeks values in \'since\' option are \ - considered for IMAP conections. Examples: 2d or 1w") + logger.debug( + "Only days and weeks values in 'since' option are \ + considered for IMAP conections. Examples: 2d or 1w" + ) since = (datetime.utcnow() - timedelta(minutes=_since)).date() current_time = datetime.utcnow().date() elif isinstance(connection, MSGraphConnection): - since = (datetime.utcnow() - timedelta(minutes=_since)) \ - .isoformat() + 'Z' - current_time = datetime.utcnow().isoformat() + 'Z' + since = (datetime.utcnow() - timedelta(minutes=_since)).isoformat() + "Z" + current_time = datetime.utcnow().isoformat() + "Z" elif isinstance(connection, GmailConnection): - since = (datetime.utcnow() - timedelta(minutes=_since)) \ - .strftime('%s') - current_time = datetime.utcnow().strftime('%s') + since = (datetime.utcnow() - timedelta(minutes=_since)).strftime("%s") + current_time = datetime.utcnow().strftime("%s") else: pass - messages = connection.fetch_messages(reports_folder, batch_size=batch_size, - since=since) + messages = connection.fetch_messages( + reports_folder, batch_size=batch_size, since=since + ) total_messages = len(messages) logger.debug("Found {0} messages in {1}".format(len(messages), reports_folder)) @@ -1612,16 +1620,16 @@ def get_dmarc_reports_from_mailbox(connection: MailboxConnection, for i in range(message_limit): msg_uid = messages[i] - logger.debug("Processing message {0} of {1}: UID {2}".format( - i+1, message_limit, msg_uid - )) + logger.debug( + "Processing message {0} of {1}: UID {2}".format( + i + 1, message_limit, msg_uid + ) + ) if isinstance(mailbox, MSGraphConnection): if test: - msg_content = connection.fetch_message(msg_uid, - mark_read=False) + msg_content = connection.fetch_message(msg_uid, mark_read=False) else: - msg_content = connection.fetch_message(msg_uid, - mark_read=True) + msg_content = connection.fetch_message(msg_uid, mark_read=True) else: msg_content = connection.fetch_message(msg_uid) try: @@ -1755,8 +1763,9 @@ def get_dmarc_reports_from_mailbox(connection: MailboxConnection, ) if current_time: - total_messages = len(connection.fetch_messages(reports_folder, - since=current_time)) + total_messages = len( + connection.fetch_messages(reports_folder, since=current_time) + ) else: total_messages = len(connection.fetch_messages(reports_folder)) diff --git a/parsedmarc/cli.py b/parsedmarc/cli.py index 7e7b2275..167eba69 100644 --- a/parsedmarc/cli.py +++ b/parsedmarc/cli.py @@ -714,8 +714,7 @@ def process_reports(reports_): if "batch_size" in mailbox_config: opts.mailbox_batch_size = mailbox_config.getint("batch_size") if "check_timeout" in mailbox_config: - opts.mailbox_check_timeout = mailbox_config.getint( - "check_timeout") + opts.mailbox_check_timeout = mailbox_config.getint("check_timeout") if "since" in mailbox_config: opts.mailbox_since = mailbox_config["since"] diff --git a/parsedmarc/mail/gmail.py b/parsedmarc/mail/gmail.py index 6f812b36..f9163dbf 100644 --- a/parsedmarc/mail/gmail.py +++ b/parsedmarc/mail/gmail.py @@ -69,8 +69,7 @@ def create_folder(self, folder_name: str): else: raise e - def _fetch_all_message_ids(self, reports_label_id, page_token=None, - since=None): + def _fetch_all_message_ids(self, reports_label_id, page_token=None, since=None): if since: results = ( self.service.users() @@ -80,7 +79,7 @@ def _fetch_all_message_ids(self, reports_label_id, page_token=None, includeSpamTrash=self.include_spam_trash, labelIds=[reports_label_id], pageToken=page_token, - q=f'after:{since}', + q=f"after:{since}", ) .execute() ) @@ -107,10 +106,11 @@ def _fetch_all_message_ids(self, reports_label_id, page_token=None, def fetch_messages(self, reports_folder: str, **kwargs) -> List[str]: reports_label_id = self._find_label_id_for_label(reports_folder) - since = kwargs.get('since') + since = kwargs.get("since") if since: - return [id for id in self._fetch_all_message_ids(reports_label_id, - since=since)] + return [ + id for id in self._fetch_all_message_ids(reports_label_id, since=since) + ] else: return [id for id in self._fetch_all_message_ids(reports_label_id)] diff --git a/parsedmarc/mail/graph.py b/parsedmarc/mail/graph.py index 92e032ac..918706af 100644 --- a/parsedmarc/mail/graph.py +++ b/parsedmarc/mail/graph.py @@ -146,24 +146,21 @@ def create_folder(self, folder_name: str): def fetch_messages(self, folder_name: str, **kwargs) -> List[str]: """Returns a list of message UIDs in the specified folder""" folder_id = self._find_folder_id_from_folder_path(folder_name) - url = f'/users/{self.mailbox_name}/mailFolders/' \ - f'{folder_id}/messages' - since = kwargs.get('since') + url = f"/users/{self.mailbox_name}/mailFolders/" f"{folder_id}/messages" + since = kwargs.get("since") if not since: since = None - batch_size = kwargs.get('batch_size') + batch_size = kwargs.get("batch_size") if not batch_size: batch_size = 0 emails = self._get_all_messages(url, batch_size, since) - return [email['id'] for email in emails] + return [email["id"] for email in emails] def _get_all_messages(self, url, batch_size, since): messages: list - params = { - '$select': 'id' - } + params = {"$select": "id"} if since: - params['$filter'] = f'receivedDateTime ge {since}' + params["$filter"] = f"receivedDateTime ge {since}" if batch_size and batch_size > 0: params["$top"] = batch_size else: @@ -173,11 +170,10 @@ def _get_all_messages(self, url, batch_size, since): raise RuntimeError(f"Failed to fetch messages {result.text}") messages = result.json()["value"] # Loop if next page is present and not obtained message limit. - while '@odata.nextLink' in result.json() and ( - since is not None or ( - batch_size == 0 or - batch_size - len(messages) > 0)): - result = self._client.get(result.json()['@odata.nextLink']) + while "@odata.nextLink" in result.json() and ( + since is not None or (batch_size == 0 or batch_size - len(messages) > 0) + ): + result = self._client.get(result.json()["@odata.nextLink"]) if result.status_code != 200: raise RuntimeError(f"Failed to fetch messages {result.text}") messages.extend(result.json()["value"]) @@ -193,12 +189,13 @@ def mark_message_read(self, message_id: str): ) def fetch_message(self, message_id: str, **kwargs): - url = f'/users/{self.mailbox_name}/messages/{message_id}/$value' + url = f"/users/{self.mailbox_name}/messages/{message_id}/$value" result = self._client.get(url) if result.status_code != 200: - raise RuntimeWarning(f"Failed to fetch message" - f"{result.status_code}: {result.json()}") - mark_read = kwargs.get('mark_read') + raise RuntimeWarning( + f"Failed to fetch message" f"{result.status_code}: {result.json()}" + ) + mark_read = kwargs.get("mark_read") if mark_read: self.mark_message_read(message_id) return result.text diff --git a/parsedmarc/mail/imap.py b/parsedmarc/mail/imap.py index 2a85c655..11bdf9b7 100644 --- a/parsedmarc/mail/imap.py +++ b/parsedmarc/mail/imap.py @@ -39,9 +39,9 @@ def create_folder(self, folder_name: str): def fetch_messages(self, reports_folder: str, **kwargs): self._client.select_folder(reports_folder) - since = kwargs.get('since') + since = kwargs.get("since") if since: - return self._client.search([u'SINCE', since]) + return self._client.search(["SINCE", since]) else: return self._client.search()