Skip to content

Commit

Permalink
Option to auto RFC2047 encode
Browse files Browse the repository at this point in the history
  • Loading branch information
ekscrypto committed Feb 2, 2022
1 parent 426a399 commit 45d1d7b
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = ""
selectedLauncherIdentifier = "Xcode.IDEFoundation.Launcher.PosixSpawn"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES"
codeCoverageEnabled = "YES">
<Testables>
Expand Down
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ Simple use-cases:
}

if let mailboxInfo = EmailSyntaxValidator.mailbox(from: "[email protected]") {
// mailboxInfo.email == "[email protected]"
// mailboxInfo.localPart == .dotAtom("santa.claus")
// mailboxInfo.host == .domain("northpole.com")
}

if let mailboxInfo = EmailSyntaxValidator.mailbox(from: "\"Santa Claus\"@northpole.com") {
// mailboxInfo.email == "\"Santa Claus\"@northpole.com"
// mailboxInfo.localPart == .quotedString("Santa Claus")
// mailboxInfo.host == .domain("northpole.com"")
}
Expand All @@ -45,13 +47,23 @@ Allowing IPv4/IPv6 addresses
}

if let mailboxInfo = EmailSyntaxValidator.mailbox(from: "email@[IPv6:fe80::1]", allowAddressLiteral: true) {
// mailboxInfo.email == "email@[IPv6:fe80::1]"
// mailboxInfo.localPart == .dotAtom("email")
// mailboxInfo.host == .addressLiteral("IPv6:fe80::1")
}

Validating Unicode emails encoded into ASCII (RFC2047):

if let mailboxInfo = EmailSyntaxValidator.mailbox(from: "=?utf-8?B?7ZWcQHgu7ZWc6rWt?=", compatibility: .asciiWithUnicodeExtension) {
// mailboxInfo.email == "=?utf-8?B?7ZWcQHgu7ZWc6rWt?="
// mailboxInfo.localpart == .dotAtom("한")
// mailboxInfo.host == .domain("x.한국")
}

Validating Unicode emails with auto-RFC2047 encoding:

if let mailboxInfo = EmailSyntaxValidator.mailbox(from: "한@x.한국", options: [.autoEncodeToRfc2047], compatibility.asciiWithUnicodeExtension) {
// mailboxInfo.email == "=?utf-8?b?7ZWcQHgu7ZWc6rWt?="
// mailboxInfo.localpart == .dotAtom("한")
// mailboxInfo.host == .domain("x.한국")
}
Expand Down
67 changes: 55 additions & 12 deletions Sources/SwiftEmailValidator/EmailSyntaxValidator.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import SwiftPublicSuffixList
public final class EmailSyntaxValidator {

public struct Mailbox {
let email: String
let localPart: LocalPart
let host: Host

Expand All @@ -32,9 +33,8 @@ public final class EmailSyntaxValidator {
}
}

public enum ValidationStrategy {
case smtpHeader // will detect and decode =? encoded email addresses
case userInterface // will validate email can be encoded to desired smtp compatibility
public enum Options: Equatable {
case autoEncodeToRfc2047 // If using .asciiWithUnicodeExtension and string is in Unicode, will auto encode using RFC2047
}

public enum Compatibility {
Expand All @@ -46,19 +46,19 @@ public final class EmailSyntaxValidator {
/// Verify if the email address is correctly formatted
/// - Parameters:
/// - candidate: String to validate
/// - strategy: (Optional) ValidationStrategy to use, use .smtpHeader for strict validation or use UI strategy for some auto-formatting flexibility, Uses .smtpHeader by default.
/// - strategy: (Optional) ValidationStrategy to use, use .strict for strict validation or use .autoEncodeToRfc2047 for some auto-formatting flexibility, Uses .strict by default.
/// - compatibility: (Optional) Compatibility required, one of .ascii (RFC822), .asciiWithUnicodeExtension (RFC2047) or .unicode (RFC6531). Uses .unicode by default.
/// - allowAddressLiteral: (Optional) True to allow IPv4 & IPv6 instead of domains in email addresses, false otherwise. False by default.
/// - domainValidator: Non-escaping closure that return true if the domain should be considered valid or false to be rejected
/// - Returns: True if syntax is valid (.smtpHeader validation strategy) or could be adapted to be valid (.userInterface validation strategy)
public static func correctlyFormatted(_ candidate: String,
strategy: ValidationStrategy = .smtpHeader,
options: [Options] = [],
compatibility: Compatibility = .unicode,
allowAddressLiteral: Bool = false,
domainValidator: (String) -> Bool = { PublicSuffixList.isUnrestricted($0) }) -> Bool {

mailbox(from: candidate,
strategy: strategy,
options: options,
compatibility: compatibility,
allowAddressLiteral: allowAddressLiteral,
domainValidator: domainValidator) != nil
Expand All @@ -73,42 +73,85 @@ public final class EmailSyntaxValidator {
/// - domainValidator: Non-escaping closure that return true if the domain should be considered valid or false to be rejected
/// - Returns: Mailbox struct on success, nil otherwise
public static func mailbox(from candidate: String,
strategy: ValidationStrategy = .smtpHeader,
options: [Options] = [],
compatibility: Compatibility = .unicode,
allowAddressLiteral: Bool = false,
domainValidator: (String) -> Bool = { PublicSuffixList.isUnrestricted($0) }) -> Mailbox? {

var smtpCandidate: String = candidate
if compatibility != .ascii, let decodedCandidate = RFC2047Coder.decode(candidate) {
smtpCandidate = decodedCandidate
var extractionCompatibility: Compatibility = compatibility
if compatibility != .ascii {
if let decodedCandidate = RFC2047Coder.decode(candidate) {
smtpCandidate = decodedCandidate
extractionCompatibility = .unicode
} else {
// Failed RFC2047 SMTP Unicode Extension decoding, fallback to ASCII or full Unicode
extractionCompatibility = (compatibility == .asciiWithUnicodeExtension ? .ascii : .unicode)
}
}

if let dotAtom = extractDotAtom(smtpCandidate, compatibility: compatibility) {
if let dotAtom = extractDotAtom(smtpCandidate, compatibility: extractionCompatibility) {
return mailbox(
localPart: .dotAtom(dotAtom),
originalCandidate: candidate,
hostCandidate: String(smtpCandidate.dropFirst(dotAtom.count + 1)),
allowAddressLiteral: allowAddressLiteral,
domainValidator: domainValidator)
}

if let quotedString = extractQuotedString(smtpCandidate, compatibility: compatibility) {
if let quotedString = extractQuotedString(smtpCandidate, compatibility: extractionCompatibility) {
return mailbox(
localPart: .quotedString(String(quotedString.cleaned)),
originalCandidate: candidate,
hostCandidate: String(smtpCandidate.dropFirst(quotedString.integral.count + 1)),
allowAddressLiteral: allowAddressLiteral,
domainValidator: domainValidator)
}

if options.contains(.autoEncodeToRfc2047), let rfc2047candidate = candidateForRfc2047(candidate, compatibility: compatibility) {
return mailbox(
from: rfc2047candidate,
options: [],
compatibility: compatibility,
allowAddressLiteral: allowAddressLiteral,
domainValidator: domainValidator)
}

return nil
}

private static func mailbox(localPart: Mailbox.LocalPart, hostCandidate: String, allowAddressLiteral: Bool, domainValidator: (String) -> Bool) -> Mailbox? {
/// Attempt to repackage a Unicode email into an RFC2047 encoded email (will return nil if string doesn't contain Unicode characters)
/// - Parameters:
/// - candidate: String that originally failed SMTP validation that should be RFC2047 encoded if possible
/// - compatibility: Required compatibility level
/// - Returns: Repackaged email string (may still fail SMTP validation) or nil if really nothing that could be done
private static func candidateForRfc2047(_ candidate: String, compatibility: Compatibility) -> String? {

guard compatibility == .asciiWithUnicodeExtension,
!candidate.hasPrefix("=?"),
candidate.rangeOfCharacter(from: qtextUnicodeSMTPCharacterSet.inverted) == nil
else {
// There are some unsupported ASCII characters which are invalid regardless of unicode or ASCII (newline, tabs, etc)
return nil
}

guard candidate.rangeOfCharacter(from: CharacterSet(charactersIn: asciiRange).inverted) != nil else {
// There are no Unicode characters to encode, so the string was already validated to the maximum extent allowed
return nil
}

// Some non-ASCII characters are present, and we can RFC2047 encode it
return RFC2047Coder.encode(candidate)
}

private static func mailbox(localPart: Mailbox.LocalPart, originalCandidate: String, hostCandidate: String, allowAddressLiteral: Bool, domainValidator: (String) -> Bool) -> Mailbox? {

guard let host = extractHost(from: hostCandidate, allowAddressLiteral: allowAddressLiteral, domainValidator: domainValidator) else {
return nil
}

return Mailbox(
email: originalCandidate,
localPart: localPart,
host: host)
}
Expand Down
9 changes: 9 additions & 0 deletions Sources/SwiftEmailValidator/RFC2047Coder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,15 @@ public final class RFC2047Coder {
return decoded
}

public static func encode(_ candidate: String) -> String? {
guard let utf8data = candidate.data(using: .utf8) else {
return nil
}
let base64 = utf8data.base64EncodedString()
.replacingOccurrences(of: "=", with: "")
return "=?utf-8?b?\(base64)?="
}

private static func match(regex: String, to value: String) -> [[String]] {
let nsValue: NSString = value as NSString
return (try? NSRegularExpression(pattern: regex, options: []))?.matches(in: value, options: [], range: NSMakeRange(0, nsValue.length)).map { match in
Expand Down
24 changes: 20 additions & 4 deletions Tests/SwiftEmailValidatorTests/EmailSyntaxValidatorTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -137,13 +137,13 @@ final class EmailSyntaxValidatorTests: XCTestCase {
}

func testAsciiRejectsUnicode() {
XCTAssertNil(EmailSyntaxValidator.mailbox(from: "한@x.한국", strategy: .smtpHeader, compatibility: .ascii), "Unicode in email addresses should not be allowed in ASCII compatibility mode")
XCTAssertNil(EmailSyntaxValidator.mailbox(from: "\"\"@x.한국", strategy: .smtpHeader, compatibility: .ascii), "Unicode in email addresses should not be allowed in ASCII compatibility mode")
XCTAssertNil(EmailSyntaxValidator.mailbox(from: "한@x.한국", compatibility: .ascii), "Unicode in email addresses should not be allowed in ASCII compatibility mode")
XCTAssertNil(EmailSyntaxValidator.mailbox(from: "\"\"@x.한국", compatibility: .ascii), "Unicode in email addresses should not be allowed in ASCII compatibility mode")
}

func testUnicodeCompatibility() {
XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한@x.한국", strategy: .smtpHeader, compatibility: .unicode)?.localPart, .dotAtom(""), "Unicode email addresses should be allowed in Unicode compatibility")
XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한.భారత్@x.한국", strategy: .smtpHeader, compatibility: .unicode)?.localPart, .dotAtom("한.భారత్"), "Unicode email addresses should be allowed in Unicode compatibility")
XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한@x.한국", compatibility: .unicode)?.localPart, .dotAtom(""), "Unicode email addresses should be allowed in Unicode compatibility")
XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한.భారత్@x.한국", compatibility: .unicode)?.localPart, .dotAtom("한.భారత్"), "Unicode email addresses should be allowed in Unicode compatibility")
}

func testLocalPartWithQEncoding() {
Expand Down Expand Up @@ -176,4 +176,20 @@ final class EmailSyntaxValidatorTests: XCTestCase {
XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("\"Test\"@\"northpole.com"))
XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("\"Test\".hello\"@northpole.com"))
}

func testAsciiWithUnicodeExtension() {
XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("한@x.한국", options: [], compatibility: .asciiWithUnicodeExtension), "Unicode characters not properly encoded should be rejected")
XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("한@x.한국", options: [.autoEncodeToRfc2047], compatibility: .ascii), "Option .autoEncodeToRfc2047 should be ignored in pure ASCII compatibility mode")
XCTAssertTrue(EmailSyntaxValidator.correctlyFormatted("한@x.한국", options: [.autoEncodeToRfc2047], compatibility: .asciiWithUnicodeExtension), "Improperly encoded Unicode characters should be automatically RFC2047 encoded when .autoEncodeToRfc2047 option is specified")
XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한@x.한국", options: [.autoEncodeToRfc2047], compatibility: .asciiWithUnicodeExtension)?.email, "=?utf-8?b?7ZWcQHgu7ZWc6rWt?=")
XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한@x.한국", options: [.autoEncodeToRfc2047], compatibility: .asciiWithUnicodeExtension)?.localPart, .dotAtom(""))
XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한@x.한국", options: [.autoEncodeToRfc2047], compatibility: .asciiWithUnicodeExtension)?.host, .domain("x.한국"))
}

func testAutoEncodeToRfc2047Guards() {
XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("=?utf-8?b?7ZWcQHgu7ZWc6rWt?=", options: [.autoEncodeToRfc2047], compatibility: .ascii))
XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("\n[email protected]", options: [.autoEncodeToRfc2047], compatibility: .ascii))
XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("\n[email protected]", options: [.autoEncodeToRfc2047], compatibility: .unicode))
XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("1234567890123456789012345678901234567890123456789012345678901234567890@this.com", options: [.autoEncodeToRfc2047], compatibility: .asciiWithUnicodeExtension))
}
}

0 comments on commit 45d1d7b

Please sign in to comment.