From 9332369b1b20d84564f03d6919a18fcb2b426702 Mon Sep 17 00:00:00 2001 From: Brad Slayter Date: Thu, 21 Dec 2023 12:47:46 -0600 Subject: [PATCH 1/7] Update AMP extractor to check for loops and AMP indicators --- .../LinkProtection/AMPCanonicalExtractor.swift | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift b/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift index dba49856e..1b3aa4028 100644 --- a/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift +++ b/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift @@ -128,6 +128,21 @@ public final class AMPCanonicalExtractor: NSObject { (function() { document.addEventListener('DOMContentLoaded', (event) => { const canonicalLinks = document.querySelectorAll('[rel="canonical"]') + + let result = undefined + if (canonicalLinks.length > 0) { + result = canonicalLinks[0].href + } + + // Loop prevention + if (window.location.href === result) { + result = undefined + } + + if (!document.documentElement.hasAttribute('amp') && !document.documentElement.hasAttribute('⚡')) { + result = undefined + } + window.webkit.messageHandlers.\(Constants.sendCanonical).postMessage({ \(Constants.canonicalKey): canonicalLinks.length > 0 ? canonicalLinks[0].href : undefined }) From c9715339dd1efdbe931d2bed8d01eadb7255fcd5 Mon Sep 17 00:00:00 2001 From: Brad Slayter Date: Thu, 21 Dec 2023 12:53:26 -0600 Subject: [PATCH 2/7] use result variable --- .../LinkProtection/AMPCanonicalExtractor.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift b/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift index 1b3aa4028..88b588097 100644 --- a/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift +++ b/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift @@ -144,7 +144,7 @@ public final class AMPCanonicalExtractor: NSObject { } window.webkit.messageHandlers.\(Constants.sendCanonical).postMessage({ - \(Constants.canonicalKey): canonicalLinks.length > 0 ? canonicalLinks[0].href : undefined + \(Constants.canonicalKey): result }) }) })() From 6049341ed442d65cfd1bc3ed4d76790a3dcd8a0a Mon Sep 17 00:00:00 2001 From: Brad Slayter Date: Thu, 21 Dec 2023 13:41:34 -0600 Subject: [PATCH 3/7] Support deep extraction setting --- .../LinkProtection/AMPCanonicalExtractor.swift | 10 +++++++++- .../LinkProtection/TrackingLinkSettings.swift | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift b/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift index 88b588097..880960d18 100644 --- a/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift +++ b/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift @@ -112,11 +112,15 @@ public final class AMPCanonicalExtractor: NSObject { public func urlContainsAMPKeyword(_ url: URL?) -> Bool { linkCleaner.lastAMPURLString = nil + + let settings = TrackingLinkSettings(fromConfig: privacyConfig) + guard privacyConfig.isEnabled(featureKey: .ampLinks) else { return false } + guard settings.deepExtractionEnabled else { return false } guard let url = url, !linkCleaner.isURLExcluded(url: url) else { return false } let urlStr = url.absoluteString - let ampKeywords = TrackingLinkSettings(fromConfig: privacyConfig).ampKeywords + let ampKeywords = settings.ampKeywords return ampKeywords.contains { keyword in return urlStr.contains(keyword) @@ -167,6 +171,10 @@ public final class AMPCanonicalExtractor: NSObject { completion(nil) return } + guard TrackingLinkSettings(fromConfig: privacyConfig).deepExtractionEnabled else { + completion(nil) + return + } guard let url = url, !linkCleaner.isURLExcluded(url: url) else { completion(nil) return diff --git a/Sources/BrowserServicesKit/LinkProtection/TrackingLinkSettings.swift b/Sources/BrowserServicesKit/LinkProtection/TrackingLinkSettings.swift index f832567df..86cf234e8 100644 --- a/Sources/BrowserServicesKit/LinkProtection/TrackingLinkSettings.swift +++ b/Sources/BrowserServicesKit/LinkProtection/TrackingLinkSettings.swift @@ -23,11 +23,13 @@ struct TrackingLinkSettings { let ampLinkFormats: [String] let ampKeywords: [String] let trackingParameters: [String] + let deepExtractionEnabled: Bool struct Constants { static let ampLinkFormats = "linkFormats" static let ampKeywords = "keywords" static let trackingParameters = "parameters" + static let deepExtractionEnabled = "deepExtractionEnabled" } init(fromConfig config: PrivacyConfiguration) { @@ -36,6 +38,7 @@ struct TrackingLinkSettings { ampLinkFormats = ampFeatureSettings[Constants.ampLinkFormats] as? [String] ?? [] ampKeywords = ampFeatureSettings[Constants.ampKeywords] as? [String] ?? [] + deepExtractionEnabled = ampFeatureSettings[Constants.deepExtractionEnabled] as? Bool ?? false trackingParameters = trackingParametersSettings[Constants.trackingParameters] as? [String] ?? [] } From 7fcbe4f41dde1f9efabcd2cb59d3ef73d83b1513 Mon Sep 17 00:00:00 2001 From: Brad Slayter Date: Thu, 21 Dec 2023 14:34:27 -0600 Subject: [PATCH 4/7] Update ref tests --- Tests/BrowserServicesKitTests/Resources/privacy-reference-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/BrowserServicesKitTests/Resources/privacy-reference-tests b/Tests/BrowserServicesKitTests/Resources/privacy-reference-tests index a3acc2194..7a91c5f3f 160000 --- a/Tests/BrowserServicesKitTests/Resources/privacy-reference-tests +++ b/Tests/BrowserServicesKitTests/Resources/privacy-reference-tests @@ -1 +1 @@ -Subproject commit a3acc2194758bec0f01f57dd0c5f106de01a354e +Subproject commit 7a91c5f3feb579a5cad9d2447d55772e1bf5e22a From 5a4128716c6882c2aecf1d50736aabc627b526cd Mon Sep 17 00:00:00 2001 From: Brad Slayter Date: Fri, 22 Dec 2023 08:46:07 -0600 Subject: [PATCH 5/7] Pin correct ref tests version --- Tests/BrowserServicesKitTests/Resources/privacy-reference-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/BrowserServicesKitTests/Resources/privacy-reference-tests b/Tests/BrowserServicesKitTests/Resources/privacy-reference-tests index 7a91c5f3f..6b7ad1e7f 160000 --- a/Tests/BrowserServicesKitTests/Resources/privacy-reference-tests +++ b/Tests/BrowserServicesKitTests/Resources/privacy-reference-tests @@ -1 +1 @@ -Subproject commit 7a91c5f3feb579a5cad9d2447d55772e1bf5e22a +Subproject commit 6b7ad1e7f15270f9dfeb58a272199f4d57c3eb22 From 23187f1d51cf7766ae72e1052a091f740b2149e4 Mon Sep 17 00:00:00 2001 From: Brad Slayter Date: Fri, 12 Jan 2024 09:37:35 -0600 Subject: [PATCH 6/7] Fix deep extraction loop prevention --- .../LinkProtection/AMPCanonicalExtractor.swift | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift b/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift index 880960d18..e4a1ff66a 100644 --- a/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift +++ b/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift @@ -187,6 +187,8 @@ public final class AMPCanonicalExtractor: NSObject { completionHandler.setCompletionHandler(completion: completion) + linkCleaner.lastAMPURLString = url.absoluteString + assert(Thread.isMainThread) webView = WKWebView(frame: .zero, configuration: makeConfiguration()) webView?.navigationDelegate = self @@ -234,12 +236,13 @@ extension AMPCanonicalExtractor: WKScriptMessageHandler { if let dict = message.body as? [String: AnyObject], let canonical = dict[Constants.canonicalKey] as? String { if let canonicalUrl = URL(string: canonical), !linkCleaner.isURLExcluded(url: canonicalUrl) { - linkCleaner.lastAMPURLString = canonicalUrl.absoluteString completionHandler.completeWithURL(canonicalUrl) } else { + linkCleaner.lastAMPURLString = nil completionHandler.completeWithURL(nil) } } else { + linkCleaner.lastAMPURLString = nil completionHandler.completeWithURL(nil) } } From 7522aa923521c26b6503db8754829e33c450bc68 Mon Sep 17 00:00:00 2001 From: Brad Slayter Date: Fri, 12 Jan 2024 10:13:44 -0600 Subject: [PATCH 7/7] Better loop prevention --- .../LinkProtection/AMPCanonicalExtractor.swift | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift b/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift index e4a1ff66a..aee2128a0 100644 --- a/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift +++ b/Sources/BrowserServicesKit/LinkProtection/AMPCanonicalExtractor.swift @@ -59,6 +59,9 @@ public final class AMPCanonicalExtractor: NSObject { private var privacyConfig: PrivacyConfiguration { privacyManager.privacyConfig } + private var currentUrl: URL? + private var lastPositiveAMPUrl: URL? + public init(linkCleaner: LinkCleaner, privacyManager: PrivacyConfigurationManaging, contentBlockingManager: CompiledRuleListsSource, @@ -111,6 +114,7 @@ public final class AMPCanonicalExtractor: NSObject { } public func urlContainsAMPKeyword(_ url: URL?) -> Bool { + guard url != lastPositiveAMPUrl else { return false } linkCleaner.lastAMPURLString = nil let settings = TrackingLinkSettings(fromConfig: privacyConfig) @@ -187,7 +191,7 @@ public final class AMPCanonicalExtractor: NSObject { completionHandler.setCompletionHandler(completion: completion) - linkCleaner.lastAMPURLString = url.absoluteString + currentUrl = url assert(Thread.isMainThread) webView = WKWebView(frame: .zero, configuration: makeConfiguration()) @@ -229,6 +233,10 @@ public final class AMPCanonicalExtractor: NSObject { extension AMPCanonicalExtractor: WKScriptMessageHandler { public func userContentController(_ userContentController: WKUserContentController, didReceive message: WKScriptMessage) { + defer { + currentUrl = nil + } + guard message.name == Constants.sendCanonical else { return } webView = nil @@ -236,13 +244,13 @@ extension AMPCanonicalExtractor: WKScriptMessageHandler { if let dict = message.body as? [String: AnyObject], let canonical = dict[Constants.canonicalKey] as? String { if let canonicalUrl = URL(string: canonical), !linkCleaner.isURLExcluded(url: canonicalUrl) { + linkCleaner.lastAMPURLString = currentUrl?.absoluteString + lastPositiveAMPUrl = currentUrl completionHandler.completeWithURL(canonicalUrl) } else { - linkCleaner.lastAMPURLString = nil completionHandler.completeWithURL(nil) } } else { - linkCleaner.lastAMPURLString = nil completionHandler.completeWithURL(nil) } }