Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better detection of AMP pages #611

Merged
merged 8 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ public final class AMPCanonicalExtractor: NSObject {

private var privacyConfig: PrivacyConfiguration { privacyManager.privacyConfig }

private var currentUrl: URL?
private var lastPositiveAMPUrl: URL?

public init(linkCleaner: LinkCleaner,
privacyManager: PrivacyConfigurationManaging,
contentBlockingManager: CompiledRuleListsSource,
Expand Down Expand Up @@ -111,12 +114,17 @@ public final class AMPCanonicalExtractor: NSObject {
}

public func urlContainsAMPKeyword(_ url: URL?) -> Bool {
guard url != lastPositiveAMPUrl else { return false }
linkCleaner.lastAMPURLString = nil

let settings = TrackingLinkSettings(fromConfig: privacyConfig)

guard privacyConfig.isEnabled(featureKey: .ampLinks) else { return false }
guard settings.deepExtractionEnabled else { return false }
guard let url = url, !linkCleaner.isURLExcluded(url: url) else { return false }
let urlStr = url.absoluteString

let ampKeywords = TrackingLinkSettings(fromConfig: privacyConfig).ampKeywords
let ampKeywords = settings.ampKeywords

return ampKeywords.contains { keyword in
return urlStr.contains(keyword)
Expand All @@ -128,8 +136,23 @@ public final class AMPCanonicalExtractor: NSObject {
(function() {
document.addEventListener('DOMContentLoaded', (event) => {
const canonicalLinks = document.querySelectorAll('[rel="canonical"]')

let result = undefined
if (canonicalLinks.length > 0) {
result = canonicalLinks[0].href
}

// Loop prevention
if (window.location.href === result) {
result = undefined
}

if (!document.documentElement.hasAttribute('amp') && !document.documentElement.hasAttribute('⚡')) {
result = undefined
}

window.webkit.messageHandlers.\(Constants.sendCanonical).postMessage({
\(Constants.canonicalKey): canonicalLinks.length > 0 ? canonicalLinks[0].href : undefined
\(Constants.canonicalKey): result
})
})
})()
Expand All @@ -152,6 +175,10 @@ public final class AMPCanonicalExtractor: NSObject {
completion(nil)
return
}
guard TrackingLinkSettings(fromConfig: privacyConfig).deepExtractionEnabled else {
completion(nil)
return
}
guard let url = url, !linkCleaner.isURLExcluded(url: url) else {
completion(nil)
return
Expand All @@ -164,6 +191,8 @@ public final class AMPCanonicalExtractor: NSObject {

completionHandler.setCompletionHandler(completion: completion)

currentUrl = url

assert(Thread.isMainThread)
webView = WKWebView(frame: .zero, configuration: makeConfiguration())
webView?.navigationDelegate = self
Expand Down Expand Up @@ -204,14 +233,19 @@ public final class AMPCanonicalExtractor: NSObject {

extension AMPCanonicalExtractor: WKScriptMessageHandler {
public func userContentController(_ userContentController: WKUserContentController, didReceive message: WKScriptMessage) {
defer {
currentUrl = nil
}

guard message.name == Constants.sendCanonical else { return }

webView = nil

if let dict = message.body as? [String: AnyObject],
let canonical = dict[Constants.canonicalKey] as? String {
if let canonicalUrl = URL(string: canonical), !linkCleaner.isURLExcluded(url: canonicalUrl) {
linkCleaner.lastAMPURLString = canonicalUrl.absoluteString
linkCleaner.lastAMPURLString = currentUrl?.absoluteString
lastPositiveAMPUrl = currentUrl
completionHandler.completeWithURL(canonicalUrl)
} else {
completionHandler.completeWithURL(nil)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ struct TrackingLinkSettings {
let ampLinkFormats: [String]
let ampKeywords: [String]
let trackingParameters: [String]
let deepExtractionEnabled: Bool

struct Constants {
static let ampLinkFormats = "linkFormats"
static let ampKeywords = "keywords"
static let trackingParameters = "parameters"
static let deepExtractionEnabled = "deepExtractionEnabled"
}

init(fromConfig config: PrivacyConfiguration) {
Expand All @@ -36,6 +38,7 @@ struct TrackingLinkSettings {

ampLinkFormats = ampFeatureSettings[Constants.ampLinkFormats] as? [String] ?? []
ampKeywords = ampFeatureSettings[Constants.ampKeywords] as? [String] ?? []
deepExtractionEnabled = ampFeatureSettings[Constants.deepExtractionEnabled] as? Bool ?? false
trackingParameters = trackingParametersSettings[Constants.trackingParameters] as? [String] ?? []
}

Expand Down
Loading