Skip to content

Commit

Permalink
rfc822: parse multipart messages on a best efforts basis
Browse files Browse the repository at this point in the history
Parse multipart messages on a best-efforts basis. Allow the user to see
as much of the message as possible, but log the errors.

If a charset or encoding error is encountered for a message part of a
multipart message, the error is logged and ignored. In those cases, we
still get a valid message body but the content is just not decoded or
converted. No error will be propagated.

If a multipart message cannot be parsed, ParseEntityStructure will
return a multipart error. This error indicates that the message is
malformed and there is nothing more we can do. The caller is then
advised to use a single text/plain body structure using
CreateTextPlainPart() to provide the entire message content to the user.

Fixes: https://todo.sr.ht/~rjarry/aerc/288
Signed-off-by: Koni Marti <[email protected]>
Acked-by: Robin Jarry <[email protected]>
  • Loading branch information
konimarti authored and rjarry committed Nov 21, 2024
1 parent 1a3b2b2 commit c2048ef
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 21 deletions.
6 changes: 5 additions & 1 deletion lib/emlview.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
_ "github.com/emersion/go-message/charset"

"git.sr.ht/~rjarry/aerc/lib/crypto"
"git.sr.ht/~rjarry/aerc/lib/log"
"git.sr.ht/~rjarry/aerc/lib/rfc822"
"git.sr.ht/~rjarry/aerc/models"
)
Expand Down Expand Up @@ -71,7 +72,10 @@ func NewEmlMessageView(full []byte, pgp crypto.Provider,
return
}
bs, err := rfc822.ParseEntityStructure(entity)
if err != nil {
if rfc822.IsMultipartError(err) {
log.Warnf("EmlView: %v", err)
bs = rfc822.CreateTextPlainBody()
} else if err != nil {
cb(nil, err)
return
}
Expand Down
5 changes: 4 additions & 1 deletion lib/messageview.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,10 @@ func NewMessageStoreView(messageInfo *models.MessageInfo, setSeen bool,
return
}
bs, err := rfc822.ParseEntityStructure(decrypted)
if err != nil {
if rfc822.IsMultipartError(err) {
log.Warnf("MessageView: %v", err)
bs = rfc822.CreateTextPlainBody()
} else if err != nil {
cb(nil, err)
return
}
Expand Down
83 changes: 71 additions & 12 deletions lib/rfc822/message.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,22 @@ import (
"github.com/emersion/go-message/mail"
)

type MultipartError struct {
e error
}

func (u MultipartError) Unwrap() error { return u.e }

func (u MultipartError) Error() string {
return "multipart error: " + u.e.Error()
}

// IsMultipartError returns a boolean indicating whether the error is known to
// report that the multipart message is malformed and could not be parsed.
func IsMultipartError(err error) bool {
return errors.As(err, new(MultipartError))
}

// RFC 1123Z regexp
var dateRe = regexp.MustCompile(`(((Mon|Tue|Wed|Thu|Fri|Sat|Sun))[,]?\s[0-9]{1,2})\s` +
`(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s` +
Expand All @@ -34,8 +50,14 @@ func FetchEntityPartReader(e *message.Entity, index []int) (io.Reader, error) {
for {
idx++
part, err := mpr.NextPart()
if err != nil {
return nil, err
switch {
case message.IsUnknownCharset(err):
log.Warnf("FetchEntityPartReader: %v", err)
case message.IsUnknownEncoding(err):
log.Warnf("FetchEntityPartReader: %v", err)
case err != nil:
log.Warnf("FetchEntityPartReader: %v", err)
return bufReader(e)
}
if idx == index[0] {
rest := index[1:]
Expand Down Expand Up @@ -89,6 +111,22 @@ func fixContentType(h message.Header) (string, map[string]string) {
return "text/plain", nil
}

// ParseEntityStructure will parse the message and create a multipart structure
// for multipart messages. Parsing is done on a best-efforts basis:
//
// If the content-type cannot be parsed, ParseEntityStructure will try to fix
// it; otherwise, it returns a text/plain mime type as a fallback. No error will
// be returned.
//
// If a charset or encoding error is encountered for a message part of a
// multipart message, the error is logged and ignored. In those cases, we still
// get a valid message body but the content is just not decoded or converted. No
// error will be returned.
//
// If reading a multipart message fails, ParseEntityStructure will return a
// multipart error. This error indicates that this message is malformed and
// there is nothing more we can do. The caller is then advised to use a single
// text/plain body structure using CreateTextPlainPart().
func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
var body models.BodyStructure
contentType, ctParams, err := e.Header.ContentType()
Expand Down Expand Up @@ -116,10 +154,15 @@ func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
if mpr := e.MultipartReader(); mpr != nil {
for {
part, err := mpr.NextPart()
if errors.Is(err, io.EOF) {
switch {
case errors.Is(err, io.EOF):
return &body, nil
} else if err != nil {
return nil, err
case message.IsUnknownCharset(err):
log.Warnf("ParseEntityStructure: %v", err)
case message.IsUnknownEncoding(err):
log.Warnf("ParseEntityStructure: %v", err)
case err != nil:
return nil, MultipartError{err}
}
ps, err := ParseEntityStructure(part)
if err != nil {
Expand All @@ -131,6 +174,16 @@ func ParseEntityStructure(e *message.Entity) (*models.BodyStructure, error) {
return &body, nil
}

// CreateTextPlainBody creats a plain-vanilla text/plain body structure.
func CreateTextPlainBody() *models.BodyStructure {
body := &models.BodyStructure{}
body.MIMEType = "text"
body.MIMESubType = "plain"
body.Params = map[string]string{"charset": "utf-8"}
body.Parts = []*models.BodyStructure{}
return body
}

func parseEnvelope(h *mail.Header) *models.Envelope {
subj, err := h.Subject()
if err != nil {
Expand Down Expand Up @@ -308,8 +361,9 @@ func MessageInfo(raw RawMessage) (*models.MessageInfo, error) {
return nil, fmt.Errorf("could not read message: %w", err)
}
bs, err := ParseEntityStructure(msg)
if errors.As(err, new(message.UnknownEncodingError)) {
parseErr = err
if IsMultipartError(err) {
log.Warnf("multipart error: %v", err)
bs = CreateTextPlainBody()
} else if err != nil {
return nil, fmt.Errorf("could not get structure: %w", err)
}
Expand Down Expand Up @@ -394,13 +448,18 @@ func NewCRLFReader(r io.Reader) io.Reader {

// ReadMessage is a wrapper for the message.Read function to read a message
// from r. The message's encoding and charset are automatically decoded to
// UTF-8. If an unknown charset is encountered, the error is logged but a nil
// error is returned since the entity object can still be read.
// UTF-8. If an unknown charset or unknown encoding is encountered, the error is
// logged but a nil error is returned since the entity object can still be read.
func ReadMessage(r io.Reader) (*message.Entity, error) {
entity, err := message.Read(r)
if message.IsUnknownCharset(err) {
log.Warnf("unknown charset encountered")
} else if err != nil {
switch {
case message.IsUnknownCharset(err):
// message body is valid, just not converted, so continue
log.Warnf("ReadMessage: %v", err)
case message.IsUnknownEncoding(err):
// message body is valid, just not decoded, so continue
log.Warnf("ReadMessage: %v", err)
case err != nil:
return nil, fmt.Errorf("could not read message: %w", err)
}
return entity, nil
Expand Down
10 changes: 3 additions & 7 deletions lib/rfc822/message_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ func TestMessageInfoParser(t *testing.T) {
}
}

func TestMessageInfoHandledError(t *testing.T) {
rootDir := "testdata/message/invalid"
func TestMessageInfoMalformed(t *testing.T) {
rootDir := "testdata/message/malformed"
msgFiles, err := os.ReadDir(rootDir)
die(err)

Expand All @@ -51,14 +51,10 @@ func TestMessageInfoHandledError(t *testing.T) {
p := fi.Name()
t.Run(p, func(t *testing.T) {
m := newMockRawMessageFromPath(filepath.Join(rootDir, p))
mi, err := MessageInfo(m)
_, err := MessageInfo(m)
if err != nil {
t.Fatal(err)
}

if perr := mi.Error; perr == nil {
t.Fatal("Expected MessageInfo.Error, got none")
}
})
}
}
Expand Down
File renamed without changes.

0 comments on commit c2048ef

Please sign in to comment.