From 45d1d7b9dbac9d5a6370783315920a755fb66c90 Mon Sep 17 00:00:00 2001 From: Dave Poirier Date: Wed, 2 Feb 2022 01:10:43 -0500 Subject: [PATCH] Option to auto RFC2047 encode --- .../xcschemes/SwiftEmailValidator.xcscheme | 4 +- README.md | 12 ++++ .../EmailSyntaxValidator.swift | 67 +++++++++++++++---- .../SwiftEmailValidator/RFC2047Coder.swift | 9 +++ .../EmailSyntaxValidatorTests.swift | 24 +++++-- 5 files changed, 98 insertions(+), 18 deletions(-) diff --git a/.swiftpm/xcode/xcshareddata/xcschemes/SwiftEmailValidator.xcscheme b/.swiftpm/xcode/xcshareddata/xcschemes/SwiftEmailValidator.xcscheme index d946023..fe576d5 100644 --- a/.swiftpm/xcode/xcshareddata/xcschemes/SwiftEmailValidator.xcscheme +++ b/.swiftpm/xcode/xcshareddata/xcschemes/SwiftEmailValidator.xcscheme @@ -52,8 +52,8 @@ diff --git a/README.md b/README.md index ce0add8..b9b15f8 100644 --- a/README.md +++ b/README.md @@ -29,11 +29,13 @@ Simple use-cases: } if let mailboxInfo = EmailSyntaxValidator.mailbox(from: "santa.claus@northpole.com") { + // mailboxInfo.email == "santa.claus@northpole.com" // mailboxInfo.localPart == .dotAtom("santa.claus") // mailboxInfo.host == .domain("northpole.com") } if let mailboxInfo = EmailSyntaxValidator.mailbox(from: "\"Santa Claus\"@northpole.com") { + // mailboxInfo.email == "\"Santa Claus\"@northpole.com" // mailboxInfo.localPart == .quotedString("Santa Claus") // mailboxInfo.host == .domain("northpole.com"") } @@ -45,6 +47,7 @@ Allowing IPv4/IPv6 addresses } if let mailboxInfo = EmailSyntaxValidator.mailbox(from: "email@[IPv6:fe80::1]", allowAddressLiteral: true) { + // mailboxInfo.email == "email@[IPv6:fe80::1]" // mailboxInfo.localPart == .dotAtom("email") // mailboxInfo.host == .addressLiteral("IPv6:fe80::1") } @@ -52,6 +55,15 @@ Allowing IPv4/IPv6 addresses Validating Unicode emails encoded into ASCII (RFC2047): if let mailboxInfo = EmailSyntaxValidator.mailbox(from: "=?utf-8?B?7ZWcQHgu7ZWc6rWt?=", compatibility: .asciiWithUnicodeExtension) { + // mailboxInfo.email == "=?utf-8?B?7ZWcQHgu7ZWc6rWt?=" + // mailboxInfo.localpart == .dotAtom("한") + // mailboxInfo.host == .domain("x.한국") + } + +Validating Unicode emails with auto-RFC2047 encoding: + + if let mailboxInfo = EmailSyntaxValidator.mailbox(from: "한@x.한국", options: [.autoEncodeToRfc2047], compatibility.asciiWithUnicodeExtension) { + // mailboxInfo.email == "=?utf-8?b?7ZWcQHgu7ZWc6rWt?=" // mailboxInfo.localpart == .dotAtom("한") // mailboxInfo.host == .domain("x.한국") } diff --git a/Sources/SwiftEmailValidator/EmailSyntaxValidator.swift b/Sources/SwiftEmailValidator/EmailSyntaxValidator.swift index 2747a8d..603c0d9 100644 --- a/Sources/SwiftEmailValidator/EmailSyntaxValidator.swift +++ b/Sources/SwiftEmailValidator/EmailSyntaxValidator.swift @@ -18,6 +18,7 @@ import SwiftPublicSuffixList public final class EmailSyntaxValidator { public struct Mailbox { + let email: String let localPart: LocalPart let host: Host @@ -32,9 +33,8 @@ public final class EmailSyntaxValidator { } } - public enum ValidationStrategy { - case smtpHeader // will detect and decode =? encoded email addresses - case userInterface // will validate email can be encoded to desired smtp compatibility + public enum Options: Equatable { + case autoEncodeToRfc2047 // If using .asciiWithUnicodeExtension and string is in Unicode, will auto encode using RFC2047 } public enum Compatibility { @@ -46,19 +46,19 @@ public final class EmailSyntaxValidator { /// Verify if the email address is correctly formatted /// - Parameters: /// - candidate: String to validate - /// - strategy: (Optional) ValidationStrategy to use, use .smtpHeader for strict validation or use UI strategy for some auto-formatting flexibility, Uses .smtpHeader by default. + /// - strategy: (Optional) ValidationStrategy to use, use .strict for strict validation or use .autoEncodeToRfc2047 for some auto-formatting flexibility, Uses .strict by default. /// - compatibility: (Optional) Compatibility required, one of .ascii (RFC822), .asciiWithUnicodeExtension (RFC2047) or .unicode (RFC6531). Uses .unicode by default. /// - allowAddressLiteral: (Optional) True to allow IPv4 & IPv6 instead of domains in email addresses, false otherwise. False by default. /// - domainValidator: Non-escaping closure that return true if the domain should be considered valid or false to be rejected /// - Returns: True if syntax is valid (.smtpHeader validation strategy) or could be adapted to be valid (.userInterface validation strategy) public static func correctlyFormatted(_ candidate: String, - strategy: ValidationStrategy = .smtpHeader, + options: [Options] = [], compatibility: Compatibility = .unicode, allowAddressLiteral: Bool = false, domainValidator: (String) -> Bool = { PublicSuffixList.isUnrestricted($0) }) -> Bool { mailbox(from: candidate, - strategy: strategy, + options: options, compatibility: compatibility, allowAddressLiteral: allowAddressLiteral, domainValidator: domainValidator) != nil @@ -73,42 +73,85 @@ public final class EmailSyntaxValidator { /// - domainValidator: Non-escaping closure that return true if the domain should be considered valid or false to be rejected /// - Returns: Mailbox struct on success, nil otherwise public static func mailbox(from candidate: String, - strategy: ValidationStrategy = .smtpHeader, + options: [Options] = [], compatibility: Compatibility = .unicode, allowAddressLiteral: Bool = false, domainValidator: (String) -> Bool = { PublicSuffixList.isUnrestricted($0) }) -> Mailbox? { var smtpCandidate: String = candidate - if compatibility != .ascii, let decodedCandidate = RFC2047Coder.decode(candidate) { - smtpCandidate = decodedCandidate + var extractionCompatibility: Compatibility = compatibility + if compatibility != .ascii { + if let decodedCandidate = RFC2047Coder.decode(candidate) { + smtpCandidate = decodedCandidate + extractionCompatibility = .unicode + } else { + // Failed RFC2047 SMTP Unicode Extension decoding, fallback to ASCII or full Unicode + extractionCompatibility = (compatibility == .asciiWithUnicodeExtension ? .ascii : .unicode) + } } - if let dotAtom = extractDotAtom(smtpCandidate, compatibility: compatibility) { + if let dotAtom = extractDotAtom(smtpCandidate, compatibility: extractionCompatibility) { return mailbox( localPart: .dotAtom(dotAtom), + originalCandidate: candidate, hostCandidate: String(smtpCandidate.dropFirst(dotAtom.count + 1)), allowAddressLiteral: allowAddressLiteral, domainValidator: domainValidator) } - if let quotedString = extractQuotedString(smtpCandidate, compatibility: compatibility) { + if let quotedString = extractQuotedString(smtpCandidate, compatibility: extractionCompatibility) { return mailbox( localPart: .quotedString(String(quotedString.cleaned)), + originalCandidate: candidate, hostCandidate: String(smtpCandidate.dropFirst(quotedString.integral.count + 1)), allowAddressLiteral: allowAddressLiteral, domainValidator: domainValidator) } + if options.contains(.autoEncodeToRfc2047), let rfc2047candidate = candidateForRfc2047(candidate, compatibility: compatibility) { + return mailbox( + from: rfc2047candidate, + options: [], + compatibility: compatibility, + allowAddressLiteral: allowAddressLiteral, + domainValidator: domainValidator) + } + return nil } - private static func mailbox(localPart: Mailbox.LocalPart, hostCandidate: String, allowAddressLiteral: Bool, domainValidator: (String) -> Bool) -> Mailbox? { + /// Attempt to repackage a Unicode email into an RFC2047 encoded email (will return nil if string doesn't contain Unicode characters) + /// - Parameters: + /// - candidate: String that originally failed SMTP validation that should be RFC2047 encoded if possible + /// - compatibility: Required compatibility level + /// - Returns: Repackaged email string (may still fail SMTP validation) or nil if really nothing that could be done + private static func candidateForRfc2047(_ candidate: String, compatibility: Compatibility) -> String? { + + guard compatibility == .asciiWithUnicodeExtension, + !candidate.hasPrefix("=?"), + candidate.rangeOfCharacter(from: qtextUnicodeSMTPCharacterSet.inverted) == nil + else { + // There are some unsupported ASCII characters which are invalid regardless of unicode or ASCII (newline, tabs, etc) + return nil + } + + guard candidate.rangeOfCharacter(from: CharacterSet(charactersIn: asciiRange).inverted) != nil else { + // There are no Unicode characters to encode, so the string was already validated to the maximum extent allowed + return nil + } + + // Some non-ASCII characters are present, and we can RFC2047 encode it + return RFC2047Coder.encode(candidate) + } + + private static func mailbox(localPart: Mailbox.LocalPart, originalCandidate: String, hostCandidate: String, allowAddressLiteral: Bool, domainValidator: (String) -> Bool) -> Mailbox? { guard let host = extractHost(from: hostCandidate, allowAddressLiteral: allowAddressLiteral, domainValidator: domainValidator) else { return nil } return Mailbox( + email: originalCandidate, localPart: localPart, host: host) } diff --git a/Sources/SwiftEmailValidator/RFC2047Coder.swift b/Sources/SwiftEmailValidator/RFC2047Coder.swift index 37dfce8..f026eb7 100644 --- a/Sources/SwiftEmailValidator/RFC2047Coder.swift +++ b/Sources/SwiftEmailValidator/RFC2047Coder.swift @@ -119,6 +119,15 @@ public final class RFC2047Coder { return decoded } + public static func encode(_ candidate: String) -> String? { + guard let utf8data = candidate.data(using: .utf8) else { + return nil + } + let base64 = utf8data.base64EncodedString() + .replacingOccurrences(of: "=", with: "") + return "=?utf-8?b?\(base64)?=" + } + private static func match(regex: String, to value: String) -> [[String]] { let nsValue: NSString = value as NSString return (try? NSRegularExpression(pattern: regex, options: []))?.matches(in: value, options: [], range: NSMakeRange(0, nsValue.length)).map { match in diff --git a/Tests/SwiftEmailValidatorTests/EmailSyntaxValidatorTests.swift b/Tests/SwiftEmailValidatorTests/EmailSyntaxValidatorTests.swift index 54a5749..8fb9fe6 100644 --- a/Tests/SwiftEmailValidatorTests/EmailSyntaxValidatorTests.swift +++ b/Tests/SwiftEmailValidatorTests/EmailSyntaxValidatorTests.swift @@ -137,13 +137,13 @@ final class EmailSyntaxValidatorTests: XCTestCase { } func testAsciiRejectsUnicode() { - XCTAssertNil(EmailSyntaxValidator.mailbox(from: "한@x.한국", strategy: .smtpHeader, compatibility: .ascii), "Unicode in email addresses should not be allowed in ASCII compatibility mode") - XCTAssertNil(EmailSyntaxValidator.mailbox(from: "\"한\"@x.한국", strategy: .smtpHeader, compatibility: .ascii), "Unicode in email addresses should not be allowed in ASCII compatibility mode") + XCTAssertNil(EmailSyntaxValidator.mailbox(from: "한@x.한국", compatibility: .ascii), "Unicode in email addresses should not be allowed in ASCII compatibility mode") + XCTAssertNil(EmailSyntaxValidator.mailbox(from: "\"한\"@x.한국", compatibility: .ascii), "Unicode in email addresses should not be allowed in ASCII compatibility mode") } func testUnicodeCompatibility() { - XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한@x.한국", strategy: .smtpHeader, compatibility: .unicode)?.localPart, .dotAtom("한"), "Unicode email addresses should be allowed in Unicode compatibility") - XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한.భారత్@x.한국", strategy: .smtpHeader, compatibility: .unicode)?.localPart, .dotAtom("한.భారత్"), "Unicode email addresses should be allowed in Unicode compatibility") + XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한@x.한국", compatibility: .unicode)?.localPart, .dotAtom("한"), "Unicode email addresses should be allowed in Unicode compatibility") + XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한.భారత్@x.한국", compatibility: .unicode)?.localPart, .dotAtom("한.భారత్"), "Unicode email addresses should be allowed in Unicode compatibility") } func testLocalPartWithQEncoding() { @@ -176,4 +176,20 @@ final class EmailSyntaxValidatorTests: XCTestCase { XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("\"Test\"@\"northpole.com")) XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("\"Test\".hello\"@northpole.com")) } + + func testAsciiWithUnicodeExtension() { + XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("한@x.한국", options: [], compatibility: .asciiWithUnicodeExtension), "Unicode characters not properly encoded should be rejected") + XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("한@x.한국", options: [.autoEncodeToRfc2047], compatibility: .ascii), "Option .autoEncodeToRfc2047 should be ignored in pure ASCII compatibility mode") + XCTAssertTrue(EmailSyntaxValidator.correctlyFormatted("한@x.한국", options: [.autoEncodeToRfc2047], compatibility: .asciiWithUnicodeExtension), "Improperly encoded Unicode characters should be automatically RFC2047 encoded when .autoEncodeToRfc2047 option is specified") + XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한@x.한국", options: [.autoEncodeToRfc2047], compatibility: .asciiWithUnicodeExtension)?.email, "=?utf-8?b?7ZWcQHgu7ZWc6rWt?=") + XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한@x.한국", options: [.autoEncodeToRfc2047], compatibility: .asciiWithUnicodeExtension)?.localPart, .dotAtom("한")) + XCTAssertEqual(EmailSyntaxValidator.mailbox(from: "한@x.한국", options: [.autoEncodeToRfc2047], compatibility: .asciiWithUnicodeExtension)?.host, .domain("x.한국")) + } + + func testAutoEncodeToRfc2047Guards() { + XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("=?utf-8?b?7ZWcQHgu7ZWc6rWt?=", options: [.autoEncodeToRfc2047], compatibility: .ascii)) + XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("\nHello@this.com", options: [.autoEncodeToRfc2047], compatibility: .ascii)) + XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("\nHello@this.com", options: [.autoEncodeToRfc2047], compatibility: .unicode)) + XCTAssertFalse(EmailSyntaxValidator.correctlyFormatted("1234567890123456789012345678901234567890123456789012345678901234567890@this.com", options: [.autoEncodeToRfc2047], compatibility: .asciiWithUnicodeExtension)) + } }