Skip to content

Commit

Permalink
Decode quoted-printable UTF8 in email subjects (#25)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmcampanini authored Jul 19, 2021
1 parent 3772c9e commit b575654
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 1 deletion.
28 changes: 27 additions & 1 deletion gitdiff/patch_header.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io"
"mime/quotedprintable"
"net/mail"
"strconv"
"strings"
Expand Down Expand Up @@ -457,5 +458,30 @@ func parseSubject(s string) (string, string) {
break
}

return s[:at], s[at:]
return s[:at], decodeSubject(s[at:])
}

// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result
// of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji).
// See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject
func decodeSubject(encoded string) string {
if !strings.HasPrefix(encoded, "=?UTF-8?q?") {
// not UTF-8 encoded
return encoded
}

// If the subject is too long, `git format-patch` may produce a subject line across
// multiple lines. When parsed, this can look like the following:
// <UTF8-prefix><first-line> <UTF8-prefix><second-line>
payload := " " + encoded
payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "")
payload = strings.ReplaceAll(payload, "?=", "")

decoded, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(payload)))
if err != nil {
// if err, abort decoding and return original subject
return encoded
}

return string(decoded)
}
41 changes: 41 additions & 0 deletions gitdiff/patch_header_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ func TestParsePatchHeader(t *testing.T) {
}
expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60))
expectedTitle := "A sample commit to test header parsing"
expectedEmojiOneLineTitle := "🤖 Enabling auto-merging"
expectedEmojiMultiLineTitle := "[IA64] Put ia64 config files on the Uwe Kleine-König diet"
expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line."
expectedBodyAppendix := "CC: Joe Smith <[email protected]>"

Expand Down Expand Up @@ -267,6 +269,45 @@ Another body line.
Body: expectedBody,
},
},
"mailboxEmojiOneLine": {
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
From: Morton Haypenny <[email protected]>
Date: Sat, 11 Apr 2020 15:21:23 -0700
Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Enabling=20auto-merging?=
The medium format shows the body, which
may wrap on to multiple lines.
Another body line.
`,
Header: PatchHeader{
SHA: expectedSHA,
Author: expectedIdentity,
AuthorDate: expectedDate,
Title: expectedEmojiOneLineTitle,
Body: expectedBody,
},
},
"mailboxEmojiMultiLine": {
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
From: Morton Haypenny <[email protected]>
Date: Sat, 11 Apr 2020 15:21:23 -0700
Subject: [PATCH] =?UTF-8?q?[IA64]=20Put=20ia64=20config=20files=20on=20the=20?=
=?UTF-8?q?Uwe=20Kleine-K=C3=B6nig=20diet?=
The medium format shows the body, which
may wrap on to multiple lines.
Another body line.
`,
Header: PatchHeader{
SHA: expectedSHA,
Author: expectedIdentity,
AuthorDate: expectedDate,
Title: expectedEmojiMultiLineTitle,
Body: expectedBody,
},
},
"mailboxAppendix": {
Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001
From: Morton Haypenny <[email protected]>
Expand Down

0 comments on commit b575654

Please sign in to comment.