Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed issue #97, #123 and #124 #125

Merged
merged 5 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
![PyPI - Version](https://img.shields.io/pypi/v/mail-parser)
[![PyPI - Version](https://img.shields.io/pypi/v/mail-parser)](https://pypi.org/project/mail-parser/)
[![Coverage Status](https://coveralls.io/repos/github/SpamScope/mail-parser/badge.svg?branch=develop)](https://coveralls.io/github/SpamScope/mail-parser?branch=develop)
[![PyPI - Downloads](https://img.shields.io/pypi/dm/mail-parser?color=blue)](https://pypistats.org/packages/mail-parser)


![SpamScope](https://raw.githubusercontent.com/SpamScope/spamscope/develop/docs/logo/spamscope.png)

Expand Down
2 changes: 1 addition & 1 deletion src/mailparser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
limitations under the License.
"""

from mailparser.mailparser import (
from mailparser.core import (
MailParser,
parse_from_bytes,
parse_from_file,
Expand Down
4 changes: 2 additions & 2 deletions src/mailparser/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
import sys

import mailparser
from .exceptions import MailParserOutlookError
from .utils import (
from mailparser.exceptions import MailParserOutlookError
from mailparser.utils import (
custom_log,
print_attachments,
print_mail_fingerprints,
Expand Down
77 changes: 51 additions & 26 deletions src/mailparser/mailparser.py → src/mailparser/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
import six
import json

from .const import ADDRESSES_HEADERS, EPILOGUE_DEFECTS, REGXIP
from mailparser.const import ADDRESSES_HEADERS, EPILOGUE_DEFECTS, REGXIP

from .utils import (
from mailparser.utils import (
convert_mail_date,
decode_header_part,
find_between,
Expand All @@ -44,7 +44,7 @@
write_attachments,
)

from .exceptions import MailParserEnvironmentError
from mailparser.exceptions import MailParserEnvironmentError


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -375,6 +375,9 @@ def parse(self):
elif content_subtype in ("rtf"):
is_attachment = True
filename = "{}.rtf".format(random_string())
elif content_disposition == "attachment":
is_attachment = True
filename = "{}.txt".format(random_string())

# this is an attachment
if is_attachment:
Expand Down Expand Up @@ -464,8 +467,12 @@ def parse(self):
cte = p.get("Content-Transfer-Encoding")
if cte:
cte = cte.lower()

if not cte or cte in ["7bit", "8bit"]:
payload = payload.decode("raw-unicode-escape")
try:
payload = payload.decode("raw-unicode-escape")
except UnicodeDecodeError:
payload = ported_string(payload, encoding=charset)
else:
payload = ported_string(payload, encoding=charset)

Expand All @@ -481,12 +488,12 @@ def parse(self):
)
)
self._text_not_managed.append(payload)
else:
# Parsed object mail with all parts
self._mail = self._make_mail()

# Parsed object mail with mains parts
self._mail_partial = self._make_mail(complete=False)
# Parsed object mail with all parts
self._mail = self._make_mail()

# Parsed object mail with mains parts
self._mail_partial = self._make_mail(complete=False)

def get_server_ipaddress(self, trust):
"""
Expand Down Expand Up @@ -526,19 +533,35 @@ def get_server_ipaddress(self, trust):
i = ported_string(i)
if trust in i:
log.debug("Trust string {!r} is in {!r}".format(trust, i))
check = REGXIP.findall(i[0 : i.find("by")])

if check:
try:
ip_str = six.text_type(check[-1])
log.debug("Found sender IP {!r} in {!r}".format(ip_str, i))
ip = ipaddress.ip_address(ip_str)
except ValueError:
return
else:
if not ip.is_private:
log.debug("IP {!r} not private".format(ip_str))
return ip_str
ip_str = self._extract_ip(i)
if ip_str:
return ip_str

def _extract_ip(self, received_header):
"""
Extract the IP address from the received header if it is not private.

Args:
received_header (string): The received header string

Returns:
string with the ip address or None
"""
check = REGXIP.findall(received_header[0 : received_header.find("by")])
if check:
try:
ip_str = six.text_type(check[-1])
log.debug(
"Found sender IP {!r} in {!r}".format(ip_str, received_header)
)
ip = ipaddress.ip_address(ip_str)
except ValueError:
return None
else:
if not ip.is_private:
log.debug("IP {!r} not private".format(ip_str))
return ip_str
return None

def write_attachments(self, base_path):
"""This method writes the attachments of mail on disk
Expand Down Expand Up @@ -662,8 +685,9 @@ def date(self):

try:
conv, _ = convert_mail_date(date)
finally:
return conv
except Exception:
pass
return conv

@property
def timezone(self):
Expand All @@ -675,8 +699,9 @@ def timezone(self):

try:
_, timezone = convert_mail_date(date)
finally:
return timezone
except Exception:
pass
return timezone

@property
def date_json(self):
Expand Down
5 changes: 4 additions & 1 deletion src/mailparser/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,12 +359,15 @@ def receiveds_parsing(receiveds):


def convert_mail_date(date):
"""
Convert a mail date in a datetime object.
"""
log.debug("Date to parse: {!r}".format(date))
d = email.utils.parsedate_tz(date)
log.debug("Date parsed: {!r}".format(d))
t = email.utils.mktime_tz(d)
log.debug("Date parsed in timestamp: {!r}".format(t))
date_utc = datetime.datetime.utcfromtimestamp(t)
date_utc = datetime.datetime.fromtimestamp(t, datetime.timezone.utc)
timezone = d[9] / 3600.0 if d[9] else 0
timezone = "{:+.1f}".format(timezone)
log.debug("Calculated timezone: {!r}".format(timezone))
Expand Down
6 changes: 3 additions & 3 deletions tests/test_mail_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def test_parsing_know_values(self):
# raw = "Sun, 29 Nov 2015 09:45:18 +0100"
self.assertIsInstance(mail.date_raw, six.text_type)
self.assertIsInstance(mail.date_json, six.text_type)
raw_utc = datetime.datetime(2015, 11, 29, 8, 45, 18, 0).isoformat()
raw_utc = "2015-11-29T08:45:18+00:00"
result = mail.date.isoformat()
self.assertEqual(raw_utc, result)

Expand Down Expand Up @@ -548,7 +548,7 @@ def test_convert_mail_date(self):
s = "Mon, 20 Mar 2017 05:12:54 +0600"
d, t = convert_mail_date(s)
self.assertEqual(t, "+6.0")
self.assertEqual(str(d), "2017-03-19 23:12:54")
self.assertEqual(str(d), "2017-03-19 23:12:54+00:00")
s = "Mon, 20 Mar 2017 05:12:54 -0600"
d, t = convert_mail_date(s)
self.assertEqual(t, "-6.0")
Expand Down Expand Up @@ -650,7 +650,7 @@ def test_parse_from_bytes(self):
# raw = "Sun, 29 Nov 2015 09:45:18 +0100"
self.assertIsInstance(mail.date_raw, six.text_type)
self.assertIsInstance(mail.date_json, six.text_type)
raw_utc = datetime.datetime(2015, 11, 29, 8, 45, 18, 0).isoformat()
raw_utc = "2015-11-29T08:45:18+00:00"
result = mail.date.isoformat()
self.assertEqual(raw_utc, result)

Expand Down