diff --git a/README.md b/README.md index 149c597..e9c4403 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ -![PyPI - Version](https://img.shields.io/pypi/v/mail-parser) +[![PyPI - Version](https://img.shields.io/pypi/v/mail-parser)](https://pypi.org/project/mail-parser/) [![Coverage Status](https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=develop)](https://coveralls.io/github/SpamScope/mail-parser?branch=develop) +[![PyPI - Downloads](https://img.shields.io/pypi/dm/mail-parser?color=blue)](https://pypistats.org/packages/mail-parser) + ![SpamScope](https://raw.githubusercontent.com/SpamScope/spamscope/develop/docs/logo/spamscope.png) diff --git a/src/mailparser/__init__.py b/src/mailparser/__init__.py index a80e9bc..9808961 100644 --- a/src/mailparser/__init__.py +++ b/src/mailparser/__init__.py @@ -17,7 +17,7 @@ limitations under the License. """ -from mailparser.mailparser import ( +from mailparser.core import ( MailParser, parse_from_bytes, parse_from_file, diff --git a/src/mailparser/__main__.py b/src/mailparser/__main__.py index db9d849..db99073 100644 --- a/src/mailparser/__main__.py +++ b/src/mailparser/__main__.py @@ -23,8 +23,8 @@ import sys import mailparser -from .exceptions import MailParserOutlookError -from .utils import ( +from mailparser.exceptions import MailParserOutlookError +from mailparser.utils import ( custom_log, print_attachments, print_mail_fingerprints, diff --git a/src/mailparser/mailparser.py b/src/mailparser/core.py similarity index 92% rename from src/mailparser/mailparser.py rename to src/mailparser/core.py index 5a3e174..353e326 100644 --- a/src/mailparser/mailparser.py +++ b/src/mailparser/core.py @@ -27,9 +27,9 @@ import six import json -from .const import ADDRESSES_HEADERS, EPILOGUE_DEFECTS, REGXIP +from mailparser.const import ADDRESSES_HEADERS, EPILOGUE_DEFECTS, REGXIP -from .utils import ( +from mailparser.utils import ( convert_mail_date, decode_header_part, find_between, @@ -44,7 +44,7 @@ write_attachments, ) -from .exceptions import MailParserEnvironmentError +from mailparser.exceptions import MailParserEnvironmentError log = logging.getLogger(__name__) @@ -375,6 +375,9 @@ def parse(self): elif content_subtype in ("rtf"): is_attachment = True filename = "{}.rtf".format(random_string()) + elif content_disposition == "attachment": + is_attachment = True + filename = "{}.txt".format(random_string()) # this is an attachment if is_attachment: @@ -464,8 +467,12 @@ def parse(self): cte = p.get("Content-Transfer-Encoding") if cte: cte = cte.lower() + if not cte or cte in ["7bit", "8bit"]: - payload = payload.decode("raw-unicode-escape") + try: + payload = payload.decode("raw-unicode-escape") + except UnicodeDecodeError: + payload = ported_string(payload, encoding=charset) else: payload = ported_string(payload, encoding=charset) @@ -481,12 +488,12 @@ def parse(self): ) ) self._text_not_managed.append(payload) - else: - # Parsed object mail with all parts - self._mail = self._make_mail() - # Parsed object mail with mains parts - self._mail_partial = self._make_mail(complete=False) + # Parsed object mail with all parts + self._mail = self._make_mail() + + # Parsed object mail with mains parts + self._mail_partial = self._make_mail(complete=False) def get_server_ipaddress(self, trust): """ @@ -526,19 +533,35 @@ def get_server_ipaddress(self, trust): i = ported_string(i) if trust in i: log.debug("Trust string {!r} is in {!r}".format(trust, i)) - check = REGXIP.findall(i[0 : i.find("by")]) - - if check: - try: - ip_str = six.text_type(check[-1]) - log.debug("Found sender IP {!r} in {!r}".format(ip_str, i)) - ip = ipaddress.ip_address(ip_str) - except ValueError: - return - else: - if not ip.is_private: - log.debug("IP {!r} not private".format(ip_str)) - return ip_str + ip_str = self._extract_ip(i) + if ip_str: + return ip_str + + def _extract_ip(self, received_header): + """ + Extract the IP address from the received header if it is not private. + + Args: + received_header (string): The received header string + + Returns: + string with the ip address or None + """ + check = REGXIP.findall(received_header[0 : received_header.find("by")]) + if check: + try: + ip_str = six.text_type(check[-1]) + log.debug( + "Found sender IP {!r} in {!r}".format(ip_str, received_header) + ) + ip = ipaddress.ip_address(ip_str) + except ValueError: + return None + else: + if not ip.is_private: + log.debug("IP {!r} not private".format(ip_str)) + return ip_str + return None def write_attachments(self, base_path): """This method writes the attachments of mail on disk @@ -662,8 +685,9 @@ def date(self): try: conv, _ = convert_mail_date(date) - finally: - return conv + except Exception: + pass + return conv @property def timezone(self): @@ -675,8 +699,9 @@ def timezone(self): try: _, timezone = convert_mail_date(date) - finally: - return timezone + except Exception: + pass + return timezone @property def date_json(self): diff --git a/src/mailparser/utils.py b/src/mailparser/utils.py index 7f84838..3c4d09e 100644 --- a/src/mailparser/utils.py +++ b/src/mailparser/utils.py @@ -359,12 +359,15 @@ def receiveds_parsing(receiveds): def convert_mail_date(date): + """ + Convert a mail date in a datetime object. + """ log.debug("Date to parse: {!r}".format(date)) d = email.utils.parsedate_tz(date) log.debug("Date parsed: {!r}".format(d)) t = email.utils.mktime_tz(d) log.debug("Date parsed in timestamp: {!r}".format(t)) - date_utc = datetime.datetime.utcfromtimestamp(t) + date_utc = datetime.datetime.fromtimestamp(t, datetime.timezone.utc) timezone = d[9] / 3600.0 if d[9] else 0 timezone = "{:+.1f}".format(timezone) log.debug("Calculated timezone: {!r}".format(timezone)) diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 6ce85bf..1adb3c3 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -302,7 +302,7 @@ def test_parsing_know_values(self): # raw = "Sun, 29 Nov 2015 09:45:18 +0100" self.assertIsInstance(mail.date_raw, six.text_type) self.assertIsInstance(mail.date_json, six.text_type) - raw_utc = datetime.datetime(2015, 11, 29, 8, 45, 18, 0).isoformat() + raw_utc = "2015-11-29T08:45:18+00:00" result = mail.date.isoformat() self.assertEqual(raw_utc, result) @@ -548,7 +548,7 @@ def test_convert_mail_date(self): s = "Mon, 20 Mar 2017 05:12:54 +0600" d, t = convert_mail_date(s) self.assertEqual(t, "+6.0") - self.assertEqual(str(d), "2017-03-19 23:12:54") + self.assertEqual(str(d), "2017-03-19 23:12:54+00:00") s = "Mon, 20 Mar 2017 05:12:54 -0600" d, t = convert_mail_date(s) self.assertEqual(t, "-6.0") @@ -650,7 +650,7 @@ def test_parse_from_bytes(self): # raw = "Sun, 29 Nov 2015 09:45:18 +0100" self.assertIsInstance(mail.date_raw, six.text_type) self.assertIsInstance(mail.date_json, six.text_type) - raw_utc = datetime.datetime(2015, 11, 29, 8, 45, 18, 0).isoformat() + raw_utc = "2015-11-29T08:45:18+00:00" result = mail.date.isoformat() self.assertEqual(raw_utc, result)