From 2cec3ecad082166bd81660f4725983531f8f1635 Mon Sep 17 00:00:00 2001 From: dbajpeyi Date: Mon, 16 Dec 2024 12:19:49 +0100 Subject: [PATCH 1/6] wip: scoring password hints towards signup --- dist/autofill-debug.js | 11 ++++++++-- dist/autofill.js | 11 ++++++++-- src/Form/FormAnalyzer.js | 20 ++++++++++++++++++- .../__generated__/compiled-matching-config.js | 2 +- src/Form/matching-config/selectors-css.js | 8 ++++---- src/Form/matching-types.d.ts | 2 +- .../Resources/assets/autofill-debug.js | 11 ++++++++-- swift-package/Resources/assets/autofill.js | 11 ++++++++-- 8 files changed, 61 insertions(+), 15 deletions(-) diff --git a/dist/autofill-debug.js b/dist/autofill-debug.js index 6a9fbe5cd..f12bfb8ac 100644 --- a/dist/autofill-debug.js +++ b/dist/autofill-debug.js @@ -11293,10 +11293,17 @@ class FormAnalyzer { } // A form with many fields is unlikely to be a login form - const relevantFields = this.form.querySelectorAll(this.matching.cssSelector('genericTextField')); + const relevantFields = this.form.querySelectorAll(this.matching.cssSelector('genericTextInputField')); if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } + const passwordHintRegex = /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|\b(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/; + + // If the form contains password hints, it's highly likely a signup form. + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(passwordHintRegex, div.textContent)); + if (hasPasswordHints) { + this.increaseSignalBy(6, 'Password hints'); + } // If we can't decide at this point, try reading page headings if (this.autofillSignal === 0) { @@ -12847,7 +12854,7 @@ const matchingConfiguration = exports.matchingConfiguration = { strategies: { cssSelector: { selectors: { - genericTextField: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month])', + genericTextInputField: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month])', submitButtonSelector: 'input[type=submit], input[type=button], input[type=image], button:not([role=switch]):not([role=link]), [role=button], a[href="#"][id*=button i], a[href="#"][id*=btn i]', formInputsSelectorWithoutSelect: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month]),[autocomplete=username]', formInputsSelector: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month]),[autocomplete=username],select', diff --git a/dist/autofill.js b/dist/autofill.js index 04931d74e..9468f0d7c 100644 --- a/dist/autofill.js +++ b/dist/autofill.js @@ -6930,10 +6930,17 @@ class FormAnalyzer { } // A form with many fields is unlikely to be a login form - const relevantFields = this.form.querySelectorAll(this.matching.cssSelector('genericTextField')); + const relevantFields = this.form.querySelectorAll(this.matching.cssSelector('genericTextInputField')); if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } + const passwordHintRegex = /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|\b(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/; + + // If the form contains password hints, it's highly likely a signup form. + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(passwordHintRegex, div.textContent)); + if (hasPasswordHints) { + this.increaseSignalBy(6, 'Password hints'); + } // If we can't decide at this point, try reading page headings if (this.autofillSignal === 0) { @@ -8484,7 +8491,7 @@ const matchingConfiguration = exports.matchingConfiguration = { strategies: { cssSelector: { selectors: { - genericTextField: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month])', + genericTextInputField: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month])', submitButtonSelector: 'input[type=submit], input[type=button], input[type=image], button:not([role=switch]):not([role=link]), [role=button], a[href="#"][id*=button i], a[href="#"][id*=btn i]', formInputsSelectorWithoutSelect: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month]),[autocomplete=username]', formInputsSelector: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month]),[autocomplete=username],select', diff --git a/src/Form/FormAnalyzer.js b/src/Form/FormAnalyzer.js index 77647b70d..ea2edd813 100644 --- a/src/Form/FormAnalyzer.js +++ b/src/Form/FormAnalyzer.js @@ -331,11 +331,29 @@ class FormAnalyzer { } // A form with many fields is unlikely to be a login form - const relevantFields = this.form.querySelectorAll(this.matching.cssSelector('genericTextField')); + const relevantFields = this.form.querySelectorAll(this.matching.cssSelector('genericTextInputField')); if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } + const passwordHintRegex = + /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|\b(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/; + + // If the form contains password hints, it's highly likely a signup form. + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')) + .filter( + (div) => + div.textContent != null && + div.textContent.trim() !== '' && + window.getComputedStyle(div).display !== 'none' && + window.getComputedStyle(div).visibility !== 'hidden', + ) + .some((div) => div.textContent && safeRegexTest(passwordHintRegex, div.textContent)); + + if (hasPasswordHints) { + this.increaseSignalBy(6, 'Password hints'); + } + // If we can't decide at this point, try reading page headings if (this.autofillSignal === 0) { this.evaluatePageHeadings(); diff --git a/src/Form/matching-config/__generated__/compiled-matching-config.js b/src/Form/matching-config/__generated__/compiled-matching-config.js index 19f5db6f4..8b7dba33e 100644 --- a/src/Form/matching-config/__generated__/compiled-matching-config.js +++ b/src/Form/matching-config/__generated__/compiled-matching-config.js @@ -203,7 +203,7 @@ const matchingConfiguration = { strategies: { cssSelector: { selectors: { - genericTextField: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month])', + genericTextInputField: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month])', submitButtonSelector: 'input[type=submit], input[type=button], input[type=image], button:not([role=switch]):not([role=link]), [role=button], a[href="#"][id*=button i], a[href="#"][id*=btn i]', formInputsSelectorWithoutSelect: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month]),[autocomplete=username]', formInputsSelector: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month]),[autocomplete=username],select', diff --git a/src/Form/matching-config/selectors-css.js b/src/Form/matching-config/selectors-css.js index e8f1ff084..3af7830d8 100644 --- a/src/Form/matching-config/selectors-css.js +++ b/src/Form/matching-config/selectors-css.js @@ -1,8 +1,8 @@ // We've seen non-standard types like 'user'. This selector should get them, too -const genericTextField = ` +const genericTextInputField = ` input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month])`; -const formInputsSelectorWithoutSelect = [genericTextField, '[autocomplete=username]']; +const formInputsSelectorWithoutSelect = [genericTextInputField, '[autocomplete=username]']; const formInputsSelector = [...formInputsSelectorWithoutSelect, 'select']; @@ -39,7 +39,7 @@ input[autocomplete=email]`, ]; const username = [ - `${genericTextField}[autocomplete^=user i]`, + `${genericTextInputField}[autocomplete^=user i]`, `input[name=username i]`, // fix for `aa.com` `input[name="loginId" i]`, @@ -274,7 +274,7 @@ const birthdayYear = ` const selectors = { // Generic - genericTextField, + genericTextInputField, submitButtonSelector, formInputsSelectorWithoutSelect, formInputsSelector, diff --git a/src/Form/matching-types.d.ts b/src/Form/matching-types.d.ts index 9560f6822..9e26a842e 100644 --- a/src/Form/matching-types.d.ts +++ b/src/Form/matching-types.d.ts @@ -159,7 +159,7 @@ type RequiredCssSelectors = | 'formInputsSelectorWithoutSelect' | 'formInputsSelector' | 'submitButtonSelector' - | 'genericTextField' + | 'genericTextInputField' | 'safeUniversalSelector'; /** diff --git a/swift-package/Resources/assets/autofill-debug.js b/swift-package/Resources/assets/autofill-debug.js index 6a9fbe5cd..f12bfb8ac 100644 --- a/swift-package/Resources/assets/autofill-debug.js +++ b/swift-package/Resources/assets/autofill-debug.js @@ -11293,10 +11293,17 @@ class FormAnalyzer { } // A form with many fields is unlikely to be a login form - const relevantFields = this.form.querySelectorAll(this.matching.cssSelector('genericTextField')); + const relevantFields = this.form.querySelectorAll(this.matching.cssSelector('genericTextInputField')); if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } + const passwordHintRegex = /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|\b(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/; + + // If the form contains password hints, it's highly likely a signup form. + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(passwordHintRegex, div.textContent)); + if (hasPasswordHints) { + this.increaseSignalBy(6, 'Password hints'); + } // If we can't decide at this point, try reading page headings if (this.autofillSignal === 0) { @@ -12847,7 +12854,7 @@ const matchingConfiguration = exports.matchingConfiguration = { strategies: { cssSelector: { selectors: { - genericTextField: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month])', + genericTextInputField: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month])', submitButtonSelector: 'input[type=submit], input[type=button], input[type=image], button:not([role=switch]):not([role=link]), [role=button], a[href="#"][id*=button i], a[href="#"][id*=btn i]', formInputsSelectorWithoutSelect: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month]),[autocomplete=username]', formInputsSelector: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month]),[autocomplete=username],select', diff --git a/swift-package/Resources/assets/autofill.js b/swift-package/Resources/assets/autofill.js index 04931d74e..9468f0d7c 100644 --- a/swift-package/Resources/assets/autofill.js +++ b/swift-package/Resources/assets/autofill.js @@ -6930,10 +6930,17 @@ class FormAnalyzer { } // A form with many fields is unlikely to be a login form - const relevantFields = this.form.querySelectorAll(this.matching.cssSelector('genericTextField')); + const relevantFields = this.form.querySelectorAll(this.matching.cssSelector('genericTextInputField')); if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } + const passwordHintRegex = /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|\b(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/; + + // If the form contains password hints, it's highly likely a signup form. + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(passwordHintRegex, div.textContent)); + if (hasPasswordHints) { + this.increaseSignalBy(6, 'Password hints'); + } // If we can't decide at this point, try reading page headings if (this.autofillSignal === 0) { @@ -8484,7 +8491,7 @@ const matchingConfiguration = exports.matchingConfiguration = { strategies: { cssSelector: { selectors: { - genericTextField: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month])', + genericTextInputField: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month])', submitButtonSelector: 'input[type=submit], input[type=button], input[type=image], button:not([role=switch]):not([role=link]), [role=button], a[href="#"][id*=button i], a[href="#"][id*=btn i]', formInputsSelectorWithoutSelect: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month]),[autocomplete=username]', formInputsSelector: 'input:not([type=button]):not([type=checkbox]):not([type=color]):not([type=file]):not([type=hidden]):not([type=radio]):not([type=range]):not([type=reset]):not([type=image]):not([type=search]):not([type=submit]):not([type=time]):not([type=url]):not([type=week]):not([name^=fake i]):not([data-description^=dummy i]):not([name*=otp]):not([autocomplete="fake"]):not([placeholder^=search i]):not([type=date]):not([type=datetime-local]):not([type=datetime]):not([type=month]),[autocomplete=username],select', From 5a9532090bf6e1d8050d172afa10677cb49f6044 Mon Sep 17 00:00:00 2001 From: dbajpeyi Date: Thu, 19 Dec 2024 11:03:49 +0100 Subject: [PATCH 2/6] refactor: move regex to matcher --- dist/autofill-debug.js | 6 ++++-- dist/autofill.js | 6 ++++-- src/Form/FormAnalyzer.js | 5 +---- .../__generated__/compiled-matching-config.js | 3 +++ src/Form/matching-config/matching-config-source.js | 3 +++ src/Form/matching-types.d.ts | 2 +- swift-package/Resources/assets/autofill-debug.js | 6 ++++-- swift-package/Resources/assets/autofill.js | 6 ++++-- 8 files changed, 24 insertions(+), 13 deletions(-) diff --git a/dist/autofill-debug.js b/dist/autofill-debug.js index 3b1e7e8f3..f1385415c 100644 --- a/dist/autofill-debug.js +++ b/dist/autofill-debug.js @@ -11295,10 +11295,9 @@ class FormAnalyzer { if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - const passwordHintRegex = /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|\b(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/; // If the form contains password hints, it's highly likely a signup form. - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(passwordHintRegex, div.textContent)); + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); if (hasPasswordHints) { this.increaseSignalBy(6, 'Password hints'); } @@ -13006,6 +13005,9 @@ const matchingConfiguration = exports.matchingConfiguration = { loginProvidersRegex: { match: / with | con | mit | met | avec /iu }, + passwordHintsRegex: { + match: /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/iu + }, submitButtonRegex: { match: /submit|send|confirm|save|continue|next|sign|log.?([io])n|buy|purchase|check.?out|subscribe|donate|update|\bset\b|invia|conferma|salva|continua|entra|acced|accesso|compra|paga|sottoscriv|registra|dona|senden|\bja\b|bestätigen|weiter|nächste|kaufen|bezahlen|spenden|versturen|verzenden|opslaan|volgende|koop|kopen|voeg toe|aanmelden|envoyer|confirmer|sauvegarder|continuer|suivant|signer|connexion|acheter|payer|s.abonner|donner|enviar|confirmar|registrarse|continuar|siguiente|comprar|donar|skicka|bekräfta|spara|fortsätt|nästa|logga in|köp|handla|till kassan|registrera|donera/iu }, diff --git a/dist/autofill.js b/dist/autofill.js index 3399a5f60..db4e900de 100644 --- a/dist/autofill.js +++ b/dist/autofill.js @@ -6932,10 +6932,9 @@ class FormAnalyzer { if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - const passwordHintRegex = /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|\b(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/; // If the form contains password hints, it's highly likely a signup form. - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(passwordHintRegex, div.textContent)); + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); if (hasPasswordHints) { this.increaseSignalBy(6, 'Password hints'); } @@ -8643,6 +8642,9 @@ const matchingConfiguration = exports.matchingConfiguration = { loginProvidersRegex: { match: / with | con | mit | met | avec /iu }, + passwordHintsRegex: { + match: /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/iu + }, submitButtonRegex: { match: /submit|send|confirm|save|continue|next|sign|log.?([io])n|buy|purchase|check.?out|subscribe|donate|update|\bset\b|invia|conferma|salva|continua|entra|acced|accesso|compra|paga|sottoscriv|registra|dona|senden|\bja\b|bestätigen|weiter|nächste|kaufen|bezahlen|spenden|versturen|verzenden|opslaan|volgende|koop|kopen|voeg toe|aanmelden|envoyer|confirmer|sauvegarder|continuer|suivant|signer|connexion|acheter|payer|s.abonner|donner|enviar|confirmar|registrarse|continuar|siguiente|comprar|donar|skicka|bekräfta|spara|fortsätt|nästa|logga in|köp|handla|till kassan|registrera|donera/iu }, diff --git a/src/Form/FormAnalyzer.js b/src/Form/FormAnalyzer.js index ea2edd813..c61fc4fc8 100644 --- a/src/Form/FormAnalyzer.js +++ b/src/Form/FormAnalyzer.js @@ -336,9 +336,6 @@ class FormAnalyzer { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - const passwordHintRegex = - /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|\b(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/; - // If the form contains password hints, it's highly likely a signup form. const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')) .filter( @@ -348,7 +345,7 @@ class FormAnalyzer { window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden', ) - .some((div) => div.textContent && safeRegexTest(passwordHintRegex, div.textContent)); + .some((div) => div.textContent && safeRegexTest(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); if (hasPasswordHints) { this.increaseSignalBy(6, 'Password hints'); diff --git a/src/Form/matching-config/__generated__/compiled-matching-config.js b/src/Form/matching-config/__generated__/compiled-matching-config.js index 8b7dba33e..bec72cfe8 100644 --- a/src/Form/matching-config/__generated__/compiled-matching-config.js +++ b/src/Form/matching-config/__generated__/compiled-matching-config.js @@ -337,6 +337,9 @@ const matchingConfiguration = { match: /(forgot(ten)?|reset|don't remember).?(your )?password|password forgotten|password dimenticata|reset(?:ta) password|recuper[ao] password|(vergessen|verloren|verlegt|wiederherstellen) passwort|wachtwoord (vergeten|reset)|(oublié|récupérer) ((mon|ton|votre|le) )?mot de passe|mot de passe (oublié|perdu)|re(iniciar|cuperar) (contraseña|clave)|olvid(ó su|aste tu|é mi) (contraseña|clave)|recordar( su)? (contraseña|clave)|glömt lösenord|återställ lösenord/iu }, loginProvidersRegex: { match: / with | con | mit | met | avec /iu }, + passwordHintsRegex: { + match: /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/iu + }, submitButtonRegex: { match: /submit|send|confirm|save|continue|next|sign|log.?([io])n|buy|purchase|check.?out|subscribe|donate|update|\bset\b|invia|conferma|salva|continua|entra|acced|accesso|compra|paga|sottoscriv|registra|dona|senden|\bja\b|bestätigen|weiter|nächste|kaufen|bezahlen|spenden|versturen|verzenden|opslaan|volgende|koop|kopen|voeg toe|aanmelden|envoyer|confirmer|sauvegarder|continuer|suivant|signer|connexion|acheter|payer|s.abonner|donner|enviar|confirmar|registrarse|continuar|siguiente|comprar|donar|skicka|bekräfta|spara|fortsätt|nästa|logga in|köp|handla|till kassan|registrera|donera/iu }, diff --git a/src/Form/matching-config/matching-config-source.js b/src/Form/matching-config/matching-config-source.js index 615001515..dea7bd39c 100644 --- a/src/Form/matching-config/matching-config-source.js +++ b/src/Form/matching-config/matching-config-source.js @@ -442,6 +442,9 @@ const matchingConfiguration = { // French '| avec ', }, + passwordHintsRegex: { + match: '\\b(?:password.*?(?:must|should|has to|needs to|can))?\\b.*?(?:(at least|minimum|no fewer than)\\s+\\d+\\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|\b(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\\b)', + }, submitButtonRegex: { match: 'submit|send|confirm|save|continue|next|sign|log.?([io])n|buy|purchase|check.?out|subscribe|donate|update|\\bset\\b' + diff --git a/src/Form/matching-types.d.ts b/src/Form/matching-types.d.ts index 9e26a842e..2b1da1d65 100644 --- a/src/Form/matching-types.d.ts +++ b/src/Form/matching-types.d.ts @@ -56,7 +56,7 @@ type MatcherTypeNames = | 'birthdayMonth' | 'birthdayYear'; -type FormMatcherNames = 'loginRegex' | 'signupRegex' | 'conservativeSignupRegex' | 'resetPasswordLink' | 'loginProvidersRegex'; +type FormMatcherNames = 'loginRegex' | 'signupRegex' | 'conservativeSignupRegex' | 'resetPasswordLink' | 'loginProvidersRegex' | 'passwordHintsRegex'; type ButtonMatcherNames = 'submitButtonRegex' | 'submitButtonUnlikelyRegex'; diff --git a/swift-package/Resources/assets/autofill-debug.js b/swift-package/Resources/assets/autofill-debug.js index 3b1e7e8f3..f1385415c 100644 --- a/swift-package/Resources/assets/autofill-debug.js +++ b/swift-package/Resources/assets/autofill-debug.js @@ -11295,10 +11295,9 @@ class FormAnalyzer { if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - const passwordHintRegex = /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|\b(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/; // If the form contains password hints, it's highly likely a signup form. - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(passwordHintRegex, div.textContent)); + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); if (hasPasswordHints) { this.increaseSignalBy(6, 'Password hints'); } @@ -13006,6 +13005,9 @@ const matchingConfiguration = exports.matchingConfiguration = { loginProvidersRegex: { match: / with | con | mit | met | avec /iu }, + passwordHintsRegex: { + match: /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/iu + }, submitButtonRegex: { match: /submit|send|confirm|save|continue|next|sign|log.?([io])n|buy|purchase|check.?out|subscribe|donate|update|\bset\b|invia|conferma|salva|continua|entra|acced|accesso|compra|paga|sottoscriv|registra|dona|senden|\bja\b|bestätigen|weiter|nächste|kaufen|bezahlen|spenden|versturen|verzenden|opslaan|volgende|koop|kopen|voeg toe|aanmelden|envoyer|confirmer|sauvegarder|continuer|suivant|signer|connexion|acheter|payer|s.abonner|donner|enviar|confirmar|registrarse|continuar|siguiente|comprar|donar|skicka|bekräfta|spara|fortsätt|nästa|logga in|köp|handla|till kassan|registrera|donera/iu }, diff --git a/swift-package/Resources/assets/autofill.js b/swift-package/Resources/assets/autofill.js index 3399a5f60..db4e900de 100644 --- a/swift-package/Resources/assets/autofill.js +++ b/swift-package/Resources/assets/autofill.js @@ -6932,10 +6932,9 @@ class FormAnalyzer { if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - const passwordHintRegex = /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|\b(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/; // If the form contains password hints, it's highly likely a signup form. - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(passwordHintRegex, div.textContent)); + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); if (hasPasswordHints) { this.increaseSignalBy(6, 'Password hints'); } @@ -8643,6 +8642,9 @@ const matchingConfiguration = exports.matchingConfiguration = { loginProvidersRegex: { match: / with | con | mit | met | avec /iu }, + passwordHintsRegex: { + match: /\b(?:password.*?(?:must|should|has to|needs to|can))?\b.*?(?:(at least|minimum|no fewer than)\s+\d+\s+(characters?|letters?|numbers?|special characters?)|(uppercase|lowercase|capital|digit|number|symbol|special character)|(no spaces|cannot contain your email|cannot repeat characters|must be unique|case sensitive)\b)/iu + }, submitButtonRegex: { match: /submit|send|confirm|save|continue|next|sign|log.?([io])n|buy|purchase|check.?out|subscribe|donate|update|\bset\b|invia|conferma|salva|continua|entra|acced|accesso|compra|paga|sottoscriv|registra|dona|senden|\bja\b|bestätigen|weiter|nächste|kaufen|bezahlen|spenden|versturen|verzenden|opslaan|volgende|koop|kopen|voeg toe|aanmelden|envoyer|confirmer|sauvegarder|continuer|suivant|signer|connexion|acheter|payer|s.abonner|donner|enviar|confirmar|registrarse|continuar|siguiente|comprar|donar|skicka|bekräfta|spara|fortsätt|nästa|logga in|köp|handla|till kassan|registrera|donera/iu }, From a08905cf058d34bac8a18f68b47eae42adde2698 Mon Sep 17 00:00:00 2001 From: dbajpeyi Date: Thu, 19 Dec 2024 12:33:28 +0100 Subject: [PATCH 3/6] wip: try out header signals --- dist/autofill-debug.js | 39 +++++++++-- dist/autofill.js | 39 +++++++++-- src/Form/FormAnalyzer.js | 64 +++++++++++++++---- .../Resources/assets/autofill-debug.js | 39 +++++++++-- swift-package/Resources/assets/autofill.js | 39 +++++++++-- 5 files changed, 188 insertions(+), 32 deletions(-) diff --git a/dist/autofill-debug.js b/dist/autofill-debug.js index f1385415c..21e061582 100644 --- a/dist/autofill-debug.js +++ b/dist/autofill-debug.js @@ -11182,6 +11182,37 @@ class FormAnalyzer { } }); } + updateFormHeaderSignals() { + const isVisuallyBeforeForm = el => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; + const isHeaderSized = el => { + if (el instanceof HTMLHeadingElement) { + return true; + } + const computedStyle = window.getComputedStyle(el); + const fontWeight = computedStyle.fontWeight; + const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; + if (fontWeight === 'bold' || parseFloat(fontWeight) >= 700 || isRelativelyTall) { + return true; + } + }; + const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form); + if (allSiblings.length === 0) return false; + allSiblings.forEach(sibling => { + if (sibling instanceof HTMLElement && sibling.childElementCount === 1 && isVisuallyBeforeForm(sibling) && isHeaderSized(sibling)) { + const string = sibling.textContent?.trim(); + if (string) { + if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/, string)) { + return this.decreaseSignalBy(3, 'Strong login header before form'); + } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/, string)) { + return this.increaseSignalBy(3, 'Strong signup header before form'); + } + } + } + }); + } + hasPasswordHints() { + return Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + } /** * Function that checks if the element is an external link or a custom web element that @@ -11295,11 +11326,9 @@ class FormAnalyzer { if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - - // If the form contains password hints, it's highly likely a signup form. - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); - if (hasPasswordHints) { - this.increaseSignalBy(6, 'Password hints'); + this.updateFormHeaderSignals(); + if (this.hasPasswordHints()) { + this.increaseSignalBy(3, 'Password hints'); } // If we can't decide at this point, try reading page headings diff --git a/dist/autofill.js b/dist/autofill.js index db4e900de..f2cbfc488 100644 --- a/dist/autofill.js +++ b/dist/autofill.js @@ -6819,6 +6819,37 @@ class FormAnalyzer { } }); } + updateFormHeaderSignals() { + const isVisuallyBeforeForm = el => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; + const isHeaderSized = el => { + if (el instanceof HTMLHeadingElement) { + return true; + } + const computedStyle = window.getComputedStyle(el); + const fontWeight = computedStyle.fontWeight; + const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; + if (fontWeight === 'bold' || parseFloat(fontWeight) >= 700 || isRelativelyTall) { + return true; + } + }; + const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form); + if (allSiblings.length === 0) return false; + allSiblings.forEach(sibling => { + if (sibling instanceof HTMLElement && sibling.childElementCount === 1 && isVisuallyBeforeForm(sibling) && isHeaderSized(sibling)) { + const string = sibling.textContent?.trim(); + if (string) { + if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/, string)) { + return this.decreaseSignalBy(3, 'Strong login header before form'); + } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/, string)) { + return this.increaseSignalBy(3, 'Strong signup header before form'); + } + } + } + }); + } + hasPasswordHints() { + return Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + } /** * Function that checks if the element is an external link or a custom web element that @@ -6932,11 +6963,9 @@ class FormAnalyzer { if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - - // If the form contains password hints, it's highly likely a signup form. - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); - if (hasPasswordHints) { - this.increaseSignalBy(6, 'Password hints'); + this.updateFormHeaderSignals(); + if (this.hasPasswordHints()) { + this.increaseSignalBy(3, 'Password hints'); } // If we can't decide at this point, try reading page headings diff --git a/src/Form/FormAnalyzer.js b/src/Form/FormAnalyzer.js index c61fc4fc8..473933fbb 100644 --- a/src/Form/FormAnalyzer.js +++ b/src/Form/FormAnalyzer.js @@ -229,6 +229,55 @@ class FormAnalyzer { }); } + + updateFormHeaderSignals() { + + const isVisuallyBeforeForm = (el) => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; + + const isHeaderSized = (el) => { + if (el instanceof HTMLHeadingElement) { + return true; + } + + const computedStyle = window.getComputedStyle(el); + const fontWeight = computedStyle.fontWeight; + const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; + if (fontWeight === 'bold' || parseFloat(fontWeight) >= 700 || isRelativelyTall) { + return true + } + } + + const allSiblings = Array.from(this.form.parentElement?.children ?? []) + .filter((element) => element !== this.form) + if (allSiblings.length === 0) return false; + + + allSiblings.forEach((sibling) => { + if (sibling instanceof HTMLElement && sibling.childElementCount === 1 && isVisuallyBeforeForm(sibling) && isHeaderSized(sibling)) { + const string = sibling.textContent?.trim(); + if (string) { + if (safeRegexTest(/^(sign[- ]?in|log[- ]?in)$/, string)) { + return this.decreaseSignalBy(3, 'Strong login header before form'); + } else if (safeRegexTest(/^(sign[- ]?up)$/, string)) { + return this.increaseSignalBy(3, 'Strong signup header before form'); + } + } + } + }); + } + + hasPasswordHints() { + return Array.from(this.form.querySelectorAll('div, span')) + .filter( + (div) => + div.textContent != null && + div.textContent.trim() !== '' && + window.getComputedStyle(div).display !== 'none' && + window.getComputedStyle(div).visibility !== 'hidden', + ) + .some((div) => div.textContent && safeRegexTest(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + } + /** * Function that checks if the element is an external link or a custom web element that * encapsulates a link. @@ -336,19 +385,10 @@ class FormAnalyzer { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - // If the form contains password hints, it's highly likely a signup form. - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')) - .filter( - (div) => - div.textContent != null && - div.textContent.trim() !== '' && - window.getComputedStyle(div).display !== 'none' && - window.getComputedStyle(div).visibility !== 'hidden', - ) - .some((div) => div.textContent && safeRegexTest(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + this.updateFormHeaderSignals(); - if (hasPasswordHints) { - this.increaseSignalBy(6, 'Password hints'); + if (this.hasPasswordHints()) { + this.increaseSignalBy(3, 'Password hints'); } // If we can't decide at this point, try reading page headings diff --git a/swift-package/Resources/assets/autofill-debug.js b/swift-package/Resources/assets/autofill-debug.js index f1385415c..21e061582 100644 --- a/swift-package/Resources/assets/autofill-debug.js +++ b/swift-package/Resources/assets/autofill-debug.js @@ -11182,6 +11182,37 @@ class FormAnalyzer { } }); } + updateFormHeaderSignals() { + const isVisuallyBeforeForm = el => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; + const isHeaderSized = el => { + if (el instanceof HTMLHeadingElement) { + return true; + } + const computedStyle = window.getComputedStyle(el); + const fontWeight = computedStyle.fontWeight; + const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; + if (fontWeight === 'bold' || parseFloat(fontWeight) >= 700 || isRelativelyTall) { + return true; + } + }; + const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form); + if (allSiblings.length === 0) return false; + allSiblings.forEach(sibling => { + if (sibling instanceof HTMLElement && sibling.childElementCount === 1 && isVisuallyBeforeForm(sibling) && isHeaderSized(sibling)) { + const string = sibling.textContent?.trim(); + if (string) { + if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/, string)) { + return this.decreaseSignalBy(3, 'Strong login header before form'); + } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/, string)) { + return this.increaseSignalBy(3, 'Strong signup header before form'); + } + } + } + }); + } + hasPasswordHints() { + return Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + } /** * Function that checks if the element is an external link or a custom web element that @@ -11295,11 +11326,9 @@ class FormAnalyzer { if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - - // If the form contains password hints, it's highly likely a signup form. - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); - if (hasPasswordHints) { - this.increaseSignalBy(6, 'Password hints'); + this.updateFormHeaderSignals(); + if (this.hasPasswordHints()) { + this.increaseSignalBy(3, 'Password hints'); } // If we can't decide at this point, try reading page headings diff --git a/swift-package/Resources/assets/autofill.js b/swift-package/Resources/assets/autofill.js index db4e900de..f2cbfc488 100644 --- a/swift-package/Resources/assets/autofill.js +++ b/swift-package/Resources/assets/autofill.js @@ -6819,6 +6819,37 @@ class FormAnalyzer { } }); } + updateFormHeaderSignals() { + const isVisuallyBeforeForm = el => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; + const isHeaderSized = el => { + if (el instanceof HTMLHeadingElement) { + return true; + } + const computedStyle = window.getComputedStyle(el); + const fontWeight = computedStyle.fontWeight; + const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; + if (fontWeight === 'bold' || parseFloat(fontWeight) >= 700 || isRelativelyTall) { + return true; + } + }; + const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form); + if (allSiblings.length === 0) return false; + allSiblings.forEach(sibling => { + if (sibling instanceof HTMLElement && sibling.childElementCount === 1 && isVisuallyBeforeForm(sibling) && isHeaderSized(sibling)) { + const string = sibling.textContent?.trim(); + if (string) { + if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/, string)) { + return this.decreaseSignalBy(3, 'Strong login header before form'); + } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/, string)) { + return this.increaseSignalBy(3, 'Strong signup header before form'); + } + } + } + }); + } + hasPasswordHints() { + return Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + } /** * Function that checks if the element is an external link or a custom web element that @@ -6932,11 +6963,9 @@ class FormAnalyzer { if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - - // If the form contains password hints, it's highly likely a signup form. - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); - if (hasPasswordHints) { - this.increaseSignalBy(6, 'Password hints'); + this.updateFormHeaderSignals(); + if (this.hasPasswordHints()) { + this.increaseSignalBy(3, 'Password hints'); } // If we can't decide at this point, try reading page headings From 0640b6fb7dbaca77b993cc036f9d3c9d56c4cd32 Mon Sep 17 00:00:00 2001 From: dbajpeyi Date: Thu, 19 Dec 2024 14:47:51 +0100 Subject: [PATCH 4/6] feat: evaluate headers near form element --- dist/autofill-debug.js | 76 ++++++++++++++---- dist/autofill.js | 76 ++++++++++++++---- src/Form/FormAnalyzer.js | 80 ++++++++++++++----- .../Resources/assets/autofill-debug.js | 76 ++++++++++++++---- swift-package/Resources/assets/autofill.js | 76 ++++++++++++++---- 5 files changed, 296 insertions(+), 88 deletions(-) diff --git a/dist/autofill-debug.js b/dist/autofill-debug.js index 21e061582..516872a06 100644 --- a/dist/autofill-debug.js +++ b/dist/autofill-debug.js @@ -11015,6 +11015,9 @@ class FormAnalyzer { } return this; } + areLoginOrSignupSignalsWeak() { + return Math.abs(this.autofillSignal) < 10; + } /** * Hybrid forms can be used for both login and signup @@ -11022,8 +11025,8 @@ class FormAnalyzer { */ get isHybrid() { // When marking for hybrid we also want to ensure other signals are weak - const areOtherSignalsWeak = Math.abs(this.autofillSignal) < 10; - return this.hybridSignal > 0 && areOtherSignalsWeak; + + return this.hybridSignal > 0 && this.areLoginOrSignupSignalsWeak(); } get isLogin() { if (this.isHybrid) return false; @@ -11182,7 +11185,41 @@ class FormAnalyzer { } }); } - updateFormHeaderSignals() { + + /** + * Takes an element and returns all its children that are text-only nodes + * @param {HTMLElement|Element} element + * @param {number} maxDepth + * @param {number} currentDepth + * @returns {HTMLElement[]|Element[]} + */ + getElementsWithOnlyTextChild(element) { + let maxDepth = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 2; + let currentDepth = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0; + // Array to collect elements with only text child nodes + const elementsWithTextChild = []; + + // If we've reached the max depth, stop traversing further + if (currentDepth > maxDepth) { + return elementsWithTextChild; + } + + // Check if the current element has only one text child node + if (element.nodeType === Node.ELEMENT_NODE) { + const childNodes = element.childNodes; + if (childNodes.length === 1 && childNodes[0].nodeType === Node.TEXT_NODE) { + elementsWithTextChild.push(element); + } + } + + // Recurse through each child element and collect matching elements + for (const child of element.children) { + // Recursively get elements from child elements, increasing depth by 1 + elementsWithTextChild.push(...this.getElementsWithOnlyTextChild(child, maxDepth, currentDepth + 1)); + } + return elementsWithTextChild; + } + evaluateFormHeaderSignals() { const isVisuallyBeforeForm = el => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; const isHeaderSized = el => { if (el instanceof HTMLHeadingElement) { @@ -11191,27 +11228,30 @@ class FormAnalyzer { const computedStyle = window.getComputedStyle(el); const fontWeight = computedStyle.fontWeight; const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; - if (fontWeight === 'bold' || parseFloat(fontWeight) >= 700 || isRelativelyTall) { + if (isRelativelyTall && (fontWeight === 'bold' || parseFloat(fontWeight) >= 700)) { return true; } }; - const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form); + const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form).map(element => this.getElementsWithOnlyTextChild(element)).flat(); if (allSiblings.length === 0) return false; - allSiblings.forEach(sibling => { - if (sibling instanceof HTMLElement && sibling.childElementCount === 1 && isVisuallyBeforeForm(sibling) && isHeaderSized(sibling)) { - const string = sibling.textContent?.trim(); + allSiblings.forEach(element => { + if (element instanceof HTMLElement && isVisuallyBeforeForm(element) && isHeaderSized(element)) { + const string = element.textContent?.trim(); if (string) { - if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/, string)) { - return this.decreaseSignalBy(3, 'Strong login header before form'); - } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/, string)) { - return this.increaseSignalBy(3, 'Strong signup header before form'); + if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/i, string)) { + return this.decreaseSignalBy(3, 'Strong login signal above form'); + } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/i, string)) { + return this.increaseSignalBy(3, 'Strong signup signal above form'); } } } }); } - hasPasswordHints() { - return Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + evaluatePasswordHints() { + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + if (hasPasswordHints) { + this.increaseSignalBy(3, 'Password hints'); + } } /** @@ -11326,9 +11366,11 @@ class FormAnalyzer { if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - this.updateFormHeaderSignals(); - if (this.hasPasswordHints()) { - this.increaseSignalBy(3, 'Password hints'); + + // If we can't decide at this point, try reading form headers and password hints + if (this.areLoginOrSignupSignalsWeak()) { + this.evaluatePasswordHints(); + this.evaluateFormHeaderSignals(); } // If we can't decide at this point, try reading page headings diff --git a/dist/autofill.js b/dist/autofill.js index f2cbfc488..25c5d33e3 100644 --- a/dist/autofill.js +++ b/dist/autofill.js @@ -6652,6 +6652,9 @@ class FormAnalyzer { } return this; } + areLoginOrSignupSignalsWeak() { + return Math.abs(this.autofillSignal) < 10; + } /** * Hybrid forms can be used for both login and signup @@ -6659,8 +6662,8 @@ class FormAnalyzer { */ get isHybrid() { // When marking for hybrid we also want to ensure other signals are weak - const areOtherSignalsWeak = Math.abs(this.autofillSignal) < 10; - return this.hybridSignal > 0 && areOtherSignalsWeak; + + return this.hybridSignal > 0 && this.areLoginOrSignupSignalsWeak(); } get isLogin() { if (this.isHybrid) return false; @@ -6819,7 +6822,41 @@ class FormAnalyzer { } }); } - updateFormHeaderSignals() { + + /** + * Takes an element and returns all its children that are text-only nodes + * @param {HTMLElement|Element} element + * @param {number} maxDepth + * @param {number} currentDepth + * @returns {HTMLElement[]|Element[]} + */ + getElementsWithOnlyTextChild(element) { + let maxDepth = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 2; + let currentDepth = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0; + // Array to collect elements with only text child nodes + const elementsWithTextChild = []; + + // If we've reached the max depth, stop traversing further + if (currentDepth > maxDepth) { + return elementsWithTextChild; + } + + // Check if the current element has only one text child node + if (element.nodeType === Node.ELEMENT_NODE) { + const childNodes = element.childNodes; + if (childNodes.length === 1 && childNodes[0].nodeType === Node.TEXT_NODE) { + elementsWithTextChild.push(element); + } + } + + // Recurse through each child element and collect matching elements + for (const child of element.children) { + // Recursively get elements from child elements, increasing depth by 1 + elementsWithTextChild.push(...this.getElementsWithOnlyTextChild(child, maxDepth, currentDepth + 1)); + } + return elementsWithTextChild; + } + evaluateFormHeaderSignals() { const isVisuallyBeforeForm = el => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; const isHeaderSized = el => { if (el instanceof HTMLHeadingElement) { @@ -6828,27 +6865,30 @@ class FormAnalyzer { const computedStyle = window.getComputedStyle(el); const fontWeight = computedStyle.fontWeight; const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; - if (fontWeight === 'bold' || parseFloat(fontWeight) >= 700 || isRelativelyTall) { + if (isRelativelyTall && (fontWeight === 'bold' || parseFloat(fontWeight) >= 700)) { return true; } }; - const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form); + const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form).map(element => this.getElementsWithOnlyTextChild(element)).flat(); if (allSiblings.length === 0) return false; - allSiblings.forEach(sibling => { - if (sibling instanceof HTMLElement && sibling.childElementCount === 1 && isVisuallyBeforeForm(sibling) && isHeaderSized(sibling)) { - const string = sibling.textContent?.trim(); + allSiblings.forEach(element => { + if (element instanceof HTMLElement && isVisuallyBeforeForm(element) && isHeaderSized(element)) { + const string = element.textContent?.trim(); if (string) { - if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/, string)) { - return this.decreaseSignalBy(3, 'Strong login header before form'); - } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/, string)) { - return this.increaseSignalBy(3, 'Strong signup header before form'); + if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/i, string)) { + return this.decreaseSignalBy(3, 'Strong login signal above form'); + } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/i, string)) { + return this.increaseSignalBy(3, 'Strong signup signal above form'); } } } }); } - hasPasswordHints() { - return Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + evaluatePasswordHints() { + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + if (hasPasswordHints) { + this.increaseSignalBy(3, 'Password hints'); + } } /** @@ -6963,9 +7003,11 @@ class FormAnalyzer { if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - this.updateFormHeaderSignals(); - if (this.hasPasswordHints()) { - this.increaseSignalBy(3, 'Password hints'); + + // If we can't decide at this point, try reading form headers and password hints + if (this.areLoginOrSignupSignalsWeak()) { + this.evaluatePasswordHints(); + this.evaluateFormHeaderSignals(); } // If we can't decide at this point, try reading page headings diff --git a/src/Form/FormAnalyzer.js b/src/Form/FormAnalyzer.js index 473933fbb..003038353 100644 --- a/src/Form/FormAnalyzer.js +++ b/src/Form/FormAnalyzer.js @@ -53,15 +53,18 @@ class FormAnalyzer { return this; } + areLoginOrSignupSignalsWeak() { + return Math.abs(this.autofillSignal) < 10; + } + /** * Hybrid forms can be used for both login and signup * @returns {boolean} */ get isHybrid() { // When marking for hybrid we also want to ensure other signals are weak - const areOtherSignalsWeak = Math.abs(this.autofillSignal) < 10; - return this.hybridSignal > 0 && areOtherSignalsWeak; + return this.hybridSignal > 0 && this.areLoginOrSignupSignalsWeak(); } get isLogin() { @@ -229,9 +232,41 @@ class FormAnalyzer { }); } + /** + * Takes an element and returns all its children that are text-only nodes + * @param {HTMLElement|Element} element + * @param {number} maxDepth + * @param {number} currentDepth + * @returns {HTMLElement[]|Element[]} + */ + getElementsWithOnlyTextChild(element, maxDepth = 2, currentDepth = 0) { + // Array to collect elements with only text child nodes + const elementsWithTextChild = []; + + // If we've reached the max depth, stop traversing further + if (currentDepth > maxDepth) { + return elementsWithTextChild; + } - updateFormHeaderSignals() { + // Check if the current element has only one text child node + if (element.nodeType === Node.ELEMENT_NODE) { + const childNodes = element.childNodes; + + if (childNodes.length === 1 && childNodes[0].nodeType === Node.TEXT_NODE) { + elementsWithTextChild.push(element); + } + } + + // Recurse through each child element and collect matching elements + for (const child of element.children) { + // Recursively get elements from child elements, increasing depth by 1 + elementsWithTextChild.push(...this.getElementsWithOnlyTextChild(child, maxDepth, currentDepth + 1)); + } + return elementsWithTextChild; + } + + evaluateFormHeaderSignals() { const isVisuallyBeforeForm = (el) => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; const isHeaderSized = (el) => { @@ -242,32 +277,34 @@ class FormAnalyzer { const computedStyle = window.getComputedStyle(el); const fontWeight = computedStyle.fontWeight; const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; - if (fontWeight === 'bold' || parseFloat(fontWeight) >= 700 || isRelativelyTall) { - return true + if (isRelativelyTall && (fontWeight === 'bold' || parseFloat(fontWeight) >= 700)) { + return true; } - } + }; const allSiblings = Array.from(this.form.parentElement?.children ?? []) .filter((element) => element !== this.form) - if (allSiblings.length === 0) return false; + .map((element) => this.getElementsWithOnlyTextChild(element)) + .flat(); + if (allSiblings.length === 0) return false; - allSiblings.forEach((sibling) => { - if (sibling instanceof HTMLElement && sibling.childElementCount === 1 && isVisuallyBeforeForm(sibling) && isHeaderSized(sibling)) { - const string = sibling.textContent?.trim(); + allSiblings.forEach((element) => { + if (element instanceof HTMLElement && isVisuallyBeforeForm(element) && isHeaderSized(element)) { + const string = element.textContent?.trim(); if (string) { - if (safeRegexTest(/^(sign[- ]?in|log[- ]?in)$/, string)) { - return this.decreaseSignalBy(3, 'Strong login header before form'); - } else if (safeRegexTest(/^(sign[- ]?up)$/, string)) { - return this.increaseSignalBy(3, 'Strong signup header before form'); + if (safeRegexTest(/^(sign[- ]?in|log[- ]?in)$/i, string)) { + return this.decreaseSignalBy(3, 'Strong login signal above form'); + } else if (safeRegexTest(/^(sign[- ]?up)$/i, string)) { + return this.increaseSignalBy(3, 'Strong signup signal above form'); } } } }); } - hasPasswordHints() { - return Array.from(this.form.querySelectorAll('div, span')) + evaluatePasswordHints() { + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')) .filter( (div) => div.textContent != null && @@ -276,6 +313,9 @@ class FormAnalyzer { window.getComputedStyle(div).visibility !== 'hidden', ) .some((div) => div.textContent && safeRegexTest(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + if (hasPasswordHints) { + this.increaseSignalBy(3, 'Password hints'); + } } /** @@ -385,10 +425,10 @@ class FormAnalyzer { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - this.updateFormHeaderSignals(); - - if (this.hasPasswordHints()) { - this.increaseSignalBy(3, 'Password hints'); + // If we can't decide at this point, try reading form headers and password hints + if (this.areLoginOrSignupSignalsWeak()) { + this.evaluatePasswordHints(); + this.evaluateFormHeaderSignals(); } // If we can't decide at this point, try reading page headings diff --git a/swift-package/Resources/assets/autofill-debug.js b/swift-package/Resources/assets/autofill-debug.js index 21e061582..516872a06 100644 --- a/swift-package/Resources/assets/autofill-debug.js +++ b/swift-package/Resources/assets/autofill-debug.js @@ -11015,6 +11015,9 @@ class FormAnalyzer { } return this; } + areLoginOrSignupSignalsWeak() { + return Math.abs(this.autofillSignal) < 10; + } /** * Hybrid forms can be used for both login and signup @@ -11022,8 +11025,8 @@ class FormAnalyzer { */ get isHybrid() { // When marking for hybrid we also want to ensure other signals are weak - const areOtherSignalsWeak = Math.abs(this.autofillSignal) < 10; - return this.hybridSignal > 0 && areOtherSignalsWeak; + + return this.hybridSignal > 0 && this.areLoginOrSignupSignalsWeak(); } get isLogin() { if (this.isHybrid) return false; @@ -11182,7 +11185,41 @@ class FormAnalyzer { } }); } - updateFormHeaderSignals() { + + /** + * Takes an element and returns all its children that are text-only nodes + * @param {HTMLElement|Element} element + * @param {number} maxDepth + * @param {number} currentDepth + * @returns {HTMLElement[]|Element[]} + */ + getElementsWithOnlyTextChild(element) { + let maxDepth = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 2; + let currentDepth = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0; + // Array to collect elements with only text child nodes + const elementsWithTextChild = []; + + // If we've reached the max depth, stop traversing further + if (currentDepth > maxDepth) { + return elementsWithTextChild; + } + + // Check if the current element has only one text child node + if (element.nodeType === Node.ELEMENT_NODE) { + const childNodes = element.childNodes; + if (childNodes.length === 1 && childNodes[0].nodeType === Node.TEXT_NODE) { + elementsWithTextChild.push(element); + } + } + + // Recurse through each child element and collect matching elements + for (const child of element.children) { + // Recursively get elements from child elements, increasing depth by 1 + elementsWithTextChild.push(...this.getElementsWithOnlyTextChild(child, maxDepth, currentDepth + 1)); + } + return elementsWithTextChild; + } + evaluateFormHeaderSignals() { const isVisuallyBeforeForm = el => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; const isHeaderSized = el => { if (el instanceof HTMLHeadingElement) { @@ -11191,27 +11228,30 @@ class FormAnalyzer { const computedStyle = window.getComputedStyle(el); const fontWeight = computedStyle.fontWeight; const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; - if (fontWeight === 'bold' || parseFloat(fontWeight) >= 700 || isRelativelyTall) { + if (isRelativelyTall && (fontWeight === 'bold' || parseFloat(fontWeight) >= 700)) { return true; } }; - const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form); + const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form).map(element => this.getElementsWithOnlyTextChild(element)).flat(); if (allSiblings.length === 0) return false; - allSiblings.forEach(sibling => { - if (sibling instanceof HTMLElement && sibling.childElementCount === 1 && isVisuallyBeforeForm(sibling) && isHeaderSized(sibling)) { - const string = sibling.textContent?.trim(); + allSiblings.forEach(element => { + if (element instanceof HTMLElement && isVisuallyBeforeForm(element) && isHeaderSized(element)) { + const string = element.textContent?.trim(); if (string) { - if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/, string)) { - return this.decreaseSignalBy(3, 'Strong login header before form'); - } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/, string)) { - return this.increaseSignalBy(3, 'Strong signup header before form'); + if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/i, string)) { + return this.decreaseSignalBy(3, 'Strong login signal above form'); + } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/i, string)) { + return this.increaseSignalBy(3, 'Strong signup signal above form'); } } } }); } - hasPasswordHints() { - return Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + evaluatePasswordHints() { + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + if (hasPasswordHints) { + this.increaseSignalBy(3, 'Password hints'); + } } /** @@ -11326,9 +11366,11 @@ class FormAnalyzer { if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - this.updateFormHeaderSignals(); - if (this.hasPasswordHints()) { - this.increaseSignalBy(3, 'Password hints'); + + // If we can't decide at this point, try reading form headers and password hints + if (this.areLoginOrSignupSignalsWeak()) { + this.evaluatePasswordHints(); + this.evaluateFormHeaderSignals(); } // If we can't decide at this point, try reading page headings diff --git a/swift-package/Resources/assets/autofill.js b/swift-package/Resources/assets/autofill.js index f2cbfc488..25c5d33e3 100644 --- a/swift-package/Resources/assets/autofill.js +++ b/swift-package/Resources/assets/autofill.js @@ -6652,6 +6652,9 @@ class FormAnalyzer { } return this; } + areLoginOrSignupSignalsWeak() { + return Math.abs(this.autofillSignal) < 10; + } /** * Hybrid forms can be used for both login and signup @@ -6659,8 +6662,8 @@ class FormAnalyzer { */ get isHybrid() { // When marking for hybrid we also want to ensure other signals are weak - const areOtherSignalsWeak = Math.abs(this.autofillSignal) < 10; - return this.hybridSignal > 0 && areOtherSignalsWeak; + + return this.hybridSignal > 0 && this.areLoginOrSignupSignalsWeak(); } get isLogin() { if (this.isHybrid) return false; @@ -6819,7 +6822,41 @@ class FormAnalyzer { } }); } - updateFormHeaderSignals() { + + /** + * Takes an element and returns all its children that are text-only nodes + * @param {HTMLElement|Element} element + * @param {number} maxDepth + * @param {number} currentDepth + * @returns {HTMLElement[]|Element[]} + */ + getElementsWithOnlyTextChild(element) { + let maxDepth = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 2; + let currentDepth = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0; + // Array to collect elements with only text child nodes + const elementsWithTextChild = []; + + // If we've reached the max depth, stop traversing further + if (currentDepth > maxDepth) { + return elementsWithTextChild; + } + + // Check if the current element has only one text child node + if (element.nodeType === Node.ELEMENT_NODE) { + const childNodes = element.childNodes; + if (childNodes.length === 1 && childNodes[0].nodeType === Node.TEXT_NODE) { + elementsWithTextChild.push(element); + } + } + + // Recurse through each child element and collect matching elements + for (const child of element.children) { + // Recursively get elements from child elements, increasing depth by 1 + elementsWithTextChild.push(...this.getElementsWithOnlyTextChild(child, maxDepth, currentDepth + 1)); + } + return elementsWithTextChild; + } + evaluateFormHeaderSignals() { const isVisuallyBeforeForm = el => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; const isHeaderSized = el => { if (el instanceof HTMLHeadingElement) { @@ -6828,27 +6865,30 @@ class FormAnalyzer { const computedStyle = window.getComputedStyle(el); const fontWeight = computedStyle.fontWeight; const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; - if (fontWeight === 'bold' || parseFloat(fontWeight) >= 700 || isRelativelyTall) { + if (isRelativelyTall && (fontWeight === 'bold' || parseFloat(fontWeight) >= 700)) { return true; } }; - const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form); + const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form).map(element => this.getElementsWithOnlyTextChild(element)).flat(); if (allSiblings.length === 0) return false; - allSiblings.forEach(sibling => { - if (sibling instanceof HTMLElement && sibling.childElementCount === 1 && isVisuallyBeforeForm(sibling) && isHeaderSized(sibling)) { - const string = sibling.textContent?.trim(); + allSiblings.forEach(element => { + if (element instanceof HTMLElement && isVisuallyBeforeForm(element) && isHeaderSized(element)) { + const string = element.textContent?.trim(); if (string) { - if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/, string)) { - return this.decreaseSignalBy(3, 'Strong login header before form'); - } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/, string)) { - return this.increaseSignalBy(3, 'Strong signup header before form'); + if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/i, string)) { + return this.decreaseSignalBy(3, 'Strong login signal above form'); + } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/i, string)) { + return this.increaseSignalBy(3, 'Strong signup signal above form'); } } } }); } - hasPasswordHints() { - return Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + evaluatePasswordHints() { + const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); + if (hasPasswordHints) { + this.increaseSignalBy(3, 'Password hints'); + } } /** @@ -6963,9 +7003,11 @@ class FormAnalyzer { if (relevantFields.length >= 4) { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - this.updateFormHeaderSignals(); - if (this.hasPasswordHints()) { - this.increaseSignalBy(3, 'Password hints'); + + // If we can't decide at this point, try reading form headers and password hints + if (this.areLoginOrSignupSignalsWeak()) { + this.evaluatePasswordHints(); + this.evaluateFormHeaderSignals(); } // If we can't decide at this point, try reading page headings From 7402c0d1276559b71f58ee613863fa541eca867b Mon Sep 17 00:00:00 2001 From: dbajpeyi Date: Fri, 20 Dec 2024 16:04:18 +0100 Subject: [PATCH 5/6] fix: remove header evaluation --- dist/autofill-debug.js | 86 +++------------ dist/autofill.js | 86 +++------------ src/Form/FormAnalyzer.js | 100 +++--------------- src/autofill-utils.js | 4 +- .../Resources/assets/autofill-debug.js | 86 +++------------ swift-package/Resources/assets/autofill.js | 86 +++------------ test-forms/index.json | 2 +- 7 files changed, 71 insertions(+), 379 deletions(-) diff --git a/dist/autofill-debug.js b/dist/autofill-debug.js index 516872a06..6fed19157 100644 --- a/dist/autofill-debug.js +++ b/dist/autofill-debug.js @@ -11004,9 +11004,6 @@ class FormAnalyzer { */ this.signals = []; - // Analyse the input that was passed. This is pretty arbitrary, but historically it's been working nicely. - this.evaluateElAttributes(input, 1, true); - // If we have a meaningful container (a form), check that, otherwise check the whole page if (form !== input) { this.evaluateForm(); @@ -11103,7 +11100,7 @@ class FormAnalyzer { return this; } const signupRegexToUse = this.matching.getDDGMatcherRegex(shouldBeConservative ? 'conservativeSignupRegex' : 'signupRegex'); - const matchesSignup = (0, _autofillUtils.safeRegexTest)(/new.?password/i, string) || (0, _autofillUtils.safeRegexTest)(signupRegexToUse, string); + const matchesSignup = (0, _autofillUtils.safeRegexTest)(/new.?(password|username)/i, string) || (0, _autofillUtils.safeRegexTest)(signupRegexToUse, string); // In some cases a login match means the login is somewhere else, i.e. when a link points outside if (shouldFlip) { @@ -11185,72 +11182,12 @@ class FormAnalyzer { } }); } - - /** - * Takes an element and returns all its children that are text-only nodes - * @param {HTMLElement|Element} element - * @param {number} maxDepth - * @param {number} currentDepth - * @returns {HTMLElement[]|Element[]} - */ - getElementsWithOnlyTextChild(element) { - let maxDepth = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 2; - let currentDepth = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0; - // Array to collect elements with only text child nodes - const elementsWithTextChild = []; - - // If we've reached the max depth, stop traversing further - if (currentDepth > maxDepth) { - return elementsWithTextChild; - } - - // Check if the current element has only one text child node - if (element.nodeType === Node.ELEMENT_NODE) { - const childNodes = element.childNodes; - if (childNodes.length === 1 && childNodes[0].nodeType === Node.TEXT_NODE) { - elementsWithTextChild.push(element); - } - } - - // Recurse through each child element and collect matching elements - for (const child of element.children) { - // Recursively get elements from child elements, increasing depth by 1 - elementsWithTextChild.push(...this.getElementsWithOnlyTextChild(child, maxDepth, currentDepth + 1)); - } - return elementsWithTextChild; - } - evaluateFormHeaderSignals() { - const isVisuallyBeforeForm = el => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; - const isHeaderSized = el => { - if (el instanceof HTMLHeadingElement) { - return true; - } - const computedStyle = window.getComputedStyle(el); - const fontWeight = computedStyle.fontWeight; - const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; - if (isRelativelyTall && (fontWeight === 'bold' || parseFloat(fontWeight) >= 700)) { - return true; - } - }; - const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form).map(element => this.getElementsWithOnlyTextChild(element)).flat(); - if (allSiblings.length === 0) return false; - allSiblings.forEach(element => { - if (element instanceof HTMLElement && isVisuallyBeforeForm(element) && isHeaderSized(element)) { - const string = element.textContent?.trim(); - if (string) { - if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/i, string)) { - return this.decreaseSignalBy(3, 'Strong login signal above form'); - } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/i, string)) { - return this.increaseSignalBy(3, 'Strong signup signal above form'); - } - } - } - }); - } evaluatePasswordHints() { - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); - if (hasPasswordHints) { - this.increaseSignalBy(3, 'Password hints'); + if (this.form.textContent) { + const hasPasswordHints = (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), this.form.textContent, 200); + if (hasPasswordHints) { + this.increaseSignalBy(5, 'Password hints'); + } } } @@ -11344,6 +11281,11 @@ class FormAnalyzer { // Check page title this.evaluatePageTitle(); + // Evaluate form's input elements + this.form.querySelectorAll(this.matching.cssSelector('formInputsSelector')).forEach(input => { + this.evaluateElAttributes(input, 1, true); + }); + // Check form attributes this.evaluateElAttributes(this.form); @@ -11367,10 +11309,9 @@ class FormAnalyzer { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - // If we can't decide at this point, try reading form headers and password hints + // If we can't decide at this point, try reading password hints if (this.areLoginOrSignupSignalsWeak()) { this.evaluatePasswordHints(); - this.evaluateFormHeaderSignals(); } // If we can't decide at this point, try reading page headings @@ -17800,7 +17741,8 @@ function isFormLikelyToBeUsedAsPageWrapper(form) { * @returns {boolean} */ function safeRegexTest(regex, string) { - if (!string || !regex || string.length > _constants.constants.TEXT_LENGTH_CUTOFF) return false; + let textLengthCutoff = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : _constants.constants.TEXT_LENGTH_CUTOFF; + if (!string || !regex || string.length > textLengthCutoff) return false; return regex.test(string); } diff --git a/dist/autofill.js b/dist/autofill.js index 25c5d33e3..f9bf11ec9 100644 --- a/dist/autofill.js +++ b/dist/autofill.js @@ -6641,9 +6641,6 @@ class FormAnalyzer { */ this.signals = []; - // Analyse the input that was passed. This is pretty arbitrary, but historically it's been working nicely. - this.evaluateElAttributes(input, 1, true); - // If we have a meaningful container (a form), check that, otherwise check the whole page if (form !== input) { this.evaluateForm(); @@ -6740,7 +6737,7 @@ class FormAnalyzer { return this; } const signupRegexToUse = this.matching.getDDGMatcherRegex(shouldBeConservative ? 'conservativeSignupRegex' : 'signupRegex'); - const matchesSignup = (0, _autofillUtils.safeRegexTest)(/new.?password/i, string) || (0, _autofillUtils.safeRegexTest)(signupRegexToUse, string); + const matchesSignup = (0, _autofillUtils.safeRegexTest)(/new.?(password|username)/i, string) || (0, _autofillUtils.safeRegexTest)(signupRegexToUse, string); // In some cases a login match means the login is somewhere else, i.e. when a link points outside if (shouldFlip) { @@ -6822,72 +6819,12 @@ class FormAnalyzer { } }); } - - /** - * Takes an element and returns all its children that are text-only nodes - * @param {HTMLElement|Element} element - * @param {number} maxDepth - * @param {number} currentDepth - * @returns {HTMLElement[]|Element[]} - */ - getElementsWithOnlyTextChild(element) { - let maxDepth = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 2; - let currentDepth = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0; - // Array to collect elements with only text child nodes - const elementsWithTextChild = []; - - // If we've reached the max depth, stop traversing further - if (currentDepth > maxDepth) { - return elementsWithTextChild; - } - - // Check if the current element has only one text child node - if (element.nodeType === Node.ELEMENT_NODE) { - const childNodes = element.childNodes; - if (childNodes.length === 1 && childNodes[0].nodeType === Node.TEXT_NODE) { - elementsWithTextChild.push(element); - } - } - - // Recurse through each child element and collect matching elements - for (const child of element.children) { - // Recursively get elements from child elements, increasing depth by 1 - elementsWithTextChild.push(...this.getElementsWithOnlyTextChild(child, maxDepth, currentDepth + 1)); - } - return elementsWithTextChild; - } - evaluateFormHeaderSignals() { - const isVisuallyBeforeForm = el => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; - const isHeaderSized = el => { - if (el instanceof HTMLHeadingElement) { - return true; - } - const computedStyle = window.getComputedStyle(el); - const fontWeight = computedStyle.fontWeight; - const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; - if (isRelativelyTall && (fontWeight === 'bold' || parseFloat(fontWeight) >= 700)) { - return true; - } - }; - const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form).map(element => this.getElementsWithOnlyTextChild(element)).flat(); - if (allSiblings.length === 0) return false; - allSiblings.forEach(element => { - if (element instanceof HTMLElement && isVisuallyBeforeForm(element) && isHeaderSized(element)) { - const string = element.textContent?.trim(); - if (string) { - if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/i, string)) { - return this.decreaseSignalBy(3, 'Strong login signal above form'); - } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/i, string)) { - return this.increaseSignalBy(3, 'Strong signup signal above form'); - } - } - } - }); - } evaluatePasswordHints() { - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); - if (hasPasswordHints) { - this.increaseSignalBy(3, 'Password hints'); + if (this.form.textContent) { + const hasPasswordHints = (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), this.form.textContent, 200); + if (hasPasswordHints) { + this.increaseSignalBy(5, 'Password hints'); + } } } @@ -6981,6 +6918,11 @@ class FormAnalyzer { // Check page title this.evaluatePageTitle(); + // Evaluate form's input elements + this.form.querySelectorAll(this.matching.cssSelector('formInputsSelector')).forEach(input => { + this.evaluateElAttributes(input, 1, true); + }); + // Check form attributes this.evaluateElAttributes(this.form); @@ -7004,10 +6946,9 @@ class FormAnalyzer { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - // If we can't decide at this point, try reading form headers and password hints + // If we can't decide at this point, try reading password hints if (this.areLoginOrSignupSignalsWeak()) { this.evaluatePasswordHints(); - this.evaluateFormHeaderSignals(); } // If we can't decide at this point, try reading page headings @@ -13437,7 +13378,8 @@ function isFormLikelyToBeUsedAsPageWrapper(form) { * @returns {boolean} */ function safeRegexTest(regex, string) { - if (!string || !regex || string.length > _constants.constants.TEXT_LENGTH_CUTOFF) return false; + let textLengthCutoff = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : _constants.constants.TEXT_LENGTH_CUTOFF; + if (!string || !regex || string.length > textLengthCutoff) return false; return regex.test(string); } diff --git a/src/Form/FormAnalyzer.js b/src/Form/FormAnalyzer.js index 003038353..6814afe6c 100644 --- a/src/Form/FormAnalyzer.js +++ b/src/Form/FormAnalyzer.js @@ -40,9 +40,6 @@ class FormAnalyzer { */ this.signals = []; - // Analyse the input that was passed. This is pretty arbitrary, but historically it's been working nicely. - this.evaluateElAttributes(input, 1, true); - // If we have a meaningful container (a form), check that, otherwise check the whole page if (form !== input) { this.evaluateForm(); @@ -149,7 +146,7 @@ class FormAnalyzer { } const signupRegexToUse = this.matching.getDDGMatcherRegex(shouldBeConservative ? 'conservativeSignupRegex' : 'signupRegex'); - const matchesSignup = safeRegexTest(/new.?password/i, string) || safeRegexTest(signupRegexToUse, string); + const matchesSignup = safeRegexTest(/new.?(password|username)/i, string) || safeRegexTest(signupRegexToUse, string); // In some cases a login match means the login is somewhere else, i.e. when a link points outside if (shouldFlip) { @@ -232,89 +229,12 @@ class FormAnalyzer { }); } - /** - * Takes an element and returns all its children that are text-only nodes - * @param {HTMLElement|Element} element - * @param {number} maxDepth - * @param {number} currentDepth - * @returns {HTMLElement[]|Element[]} - */ - getElementsWithOnlyTextChild(element, maxDepth = 2, currentDepth = 0) { - // Array to collect elements with only text child nodes - const elementsWithTextChild = []; - - // If we've reached the max depth, stop traversing further - if (currentDepth > maxDepth) { - return elementsWithTextChild; - } - - // Check if the current element has only one text child node - if (element.nodeType === Node.ELEMENT_NODE) { - const childNodes = element.childNodes; - - if (childNodes.length === 1 && childNodes[0].nodeType === Node.TEXT_NODE) { - elementsWithTextChild.push(element); - } - } - - // Recurse through each child element and collect matching elements - for (const child of element.children) { - // Recursively get elements from child elements, increasing depth by 1 - elementsWithTextChild.push(...this.getElementsWithOnlyTextChild(child, maxDepth, currentDepth + 1)); - } - - return elementsWithTextChild; - } - - evaluateFormHeaderSignals() { - const isVisuallyBeforeForm = (el) => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; - - const isHeaderSized = (el) => { - if (el instanceof HTMLHeadingElement) { - return true; - } - - const computedStyle = window.getComputedStyle(el); - const fontWeight = computedStyle.fontWeight; - const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; - if (isRelativelyTall && (fontWeight === 'bold' || parseFloat(fontWeight) >= 700)) { - return true; - } - }; - - const allSiblings = Array.from(this.form.parentElement?.children ?? []) - .filter((element) => element !== this.form) - .map((element) => this.getElementsWithOnlyTextChild(element)) - .flat(); - - if (allSiblings.length === 0) return false; - - allSiblings.forEach((element) => { - if (element instanceof HTMLElement && isVisuallyBeforeForm(element) && isHeaderSized(element)) { - const string = element.textContent?.trim(); - if (string) { - if (safeRegexTest(/^(sign[- ]?in|log[- ]?in)$/i, string)) { - return this.decreaseSignalBy(3, 'Strong login signal above form'); - } else if (safeRegexTest(/^(sign[- ]?up)$/i, string)) { - return this.increaseSignalBy(3, 'Strong signup signal above form'); - } - } - } - }); - } - evaluatePasswordHints() { - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')) - .filter( - (div) => - div.textContent != null && - div.textContent.trim() !== '' && - window.getComputedStyle(div).display !== 'none' && - window.getComputedStyle(div).visibility !== 'hidden', - ) - .some((div) => div.textContent && safeRegexTest(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); - if (hasPasswordHints) { - this.increaseSignalBy(3, 'Password hints'); + if (this.form.textContent) { + const hasPasswordHints = safeRegexTest(this.matching.getDDGMatcherRegex('passwordHintsRegex'), this.form.textContent, 200); + if (hasPasswordHints) { + this.increaseSignalBy(5, 'Password hints'); + } } } @@ -401,6 +321,11 @@ class FormAnalyzer { // Check page title this.evaluatePageTitle(); + // Evaluate form's input elements + this.form.querySelectorAll(this.matching.cssSelector('formInputsSelector')).forEach((input) => { + this.evaluateElAttributes(input, 1, true); + }); + // Check form attributes this.evaluateElAttributes(this.form); @@ -425,10 +350,9 @@ class FormAnalyzer { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - // If we can't decide at this point, try reading form headers and password hints + // If we can't decide at this point, try reading password hints if (this.areLoginOrSignupSignalsWeak()) { this.evaluatePasswordHints(); - this.evaluateFormHeaderSignals(); } // If we can't decide at this point, try reading page headings diff --git a/src/autofill-utils.js b/src/autofill-utils.js index 99a11fdc4..019abe639 100644 --- a/src/autofill-utils.js +++ b/src/autofill-utils.js @@ -518,8 +518,8 @@ function isFormLikelyToBeUsedAsPageWrapper(form) { * @param {String} string * @returns {boolean} */ -function safeRegexTest(regex, string) { - if (!string || !regex || string.length > constants.TEXT_LENGTH_CUTOFF) return false; +function safeRegexTest(regex, string, textLengthCutoff = constants.TEXT_LENGTH_CUTOFF) { + if (!string || !regex || string.length > textLengthCutoff) return false; return regex.test(string); } diff --git a/swift-package/Resources/assets/autofill-debug.js b/swift-package/Resources/assets/autofill-debug.js index 516872a06..6fed19157 100644 --- a/swift-package/Resources/assets/autofill-debug.js +++ b/swift-package/Resources/assets/autofill-debug.js @@ -11004,9 +11004,6 @@ class FormAnalyzer { */ this.signals = []; - // Analyse the input that was passed. This is pretty arbitrary, but historically it's been working nicely. - this.evaluateElAttributes(input, 1, true); - // If we have a meaningful container (a form), check that, otherwise check the whole page if (form !== input) { this.evaluateForm(); @@ -11103,7 +11100,7 @@ class FormAnalyzer { return this; } const signupRegexToUse = this.matching.getDDGMatcherRegex(shouldBeConservative ? 'conservativeSignupRegex' : 'signupRegex'); - const matchesSignup = (0, _autofillUtils.safeRegexTest)(/new.?password/i, string) || (0, _autofillUtils.safeRegexTest)(signupRegexToUse, string); + const matchesSignup = (0, _autofillUtils.safeRegexTest)(/new.?(password|username)/i, string) || (0, _autofillUtils.safeRegexTest)(signupRegexToUse, string); // In some cases a login match means the login is somewhere else, i.e. when a link points outside if (shouldFlip) { @@ -11185,72 +11182,12 @@ class FormAnalyzer { } }); } - - /** - * Takes an element and returns all its children that are text-only nodes - * @param {HTMLElement|Element} element - * @param {number} maxDepth - * @param {number} currentDepth - * @returns {HTMLElement[]|Element[]} - */ - getElementsWithOnlyTextChild(element) { - let maxDepth = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 2; - let currentDepth = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0; - // Array to collect elements with only text child nodes - const elementsWithTextChild = []; - - // If we've reached the max depth, stop traversing further - if (currentDepth > maxDepth) { - return elementsWithTextChild; - } - - // Check if the current element has only one text child node - if (element.nodeType === Node.ELEMENT_NODE) { - const childNodes = element.childNodes; - if (childNodes.length === 1 && childNodes[0].nodeType === Node.TEXT_NODE) { - elementsWithTextChild.push(element); - } - } - - // Recurse through each child element and collect matching elements - for (const child of element.children) { - // Recursively get elements from child elements, increasing depth by 1 - elementsWithTextChild.push(...this.getElementsWithOnlyTextChild(child, maxDepth, currentDepth + 1)); - } - return elementsWithTextChild; - } - evaluateFormHeaderSignals() { - const isVisuallyBeforeForm = el => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; - const isHeaderSized = el => { - if (el instanceof HTMLHeadingElement) { - return true; - } - const computedStyle = window.getComputedStyle(el); - const fontWeight = computedStyle.fontWeight; - const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; - if (isRelativelyTall && (fontWeight === 'bold' || parseFloat(fontWeight) >= 700)) { - return true; - } - }; - const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form).map(element => this.getElementsWithOnlyTextChild(element)).flat(); - if (allSiblings.length === 0) return false; - allSiblings.forEach(element => { - if (element instanceof HTMLElement && isVisuallyBeforeForm(element) && isHeaderSized(element)) { - const string = element.textContent?.trim(); - if (string) { - if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/i, string)) { - return this.decreaseSignalBy(3, 'Strong login signal above form'); - } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/i, string)) { - return this.increaseSignalBy(3, 'Strong signup signal above form'); - } - } - } - }); - } evaluatePasswordHints() { - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); - if (hasPasswordHints) { - this.increaseSignalBy(3, 'Password hints'); + if (this.form.textContent) { + const hasPasswordHints = (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), this.form.textContent, 200); + if (hasPasswordHints) { + this.increaseSignalBy(5, 'Password hints'); + } } } @@ -11344,6 +11281,11 @@ class FormAnalyzer { // Check page title this.evaluatePageTitle(); + // Evaluate form's input elements + this.form.querySelectorAll(this.matching.cssSelector('formInputsSelector')).forEach(input => { + this.evaluateElAttributes(input, 1, true); + }); + // Check form attributes this.evaluateElAttributes(this.form); @@ -11367,10 +11309,9 @@ class FormAnalyzer { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - // If we can't decide at this point, try reading form headers and password hints + // If we can't decide at this point, try reading password hints if (this.areLoginOrSignupSignalsWeak()) { this.evaluatePasswordHints(); - this.evaluateFormHeaderSignals(); } // If we can't decide at this point, try reading page headings @@ -17800,7 +17741,8 @@ function isFormLikelyToBeUsedAsPageWrapper(form) { * @returns {boolean} */ function safeRegexTest(regex, string) { - if (!string || !regex || string.length > _constants.constants.TEXT_LENGTH_CUTOFF) return false; + let textLengthCutoff = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : _constants.constants.TEXT_LENGTH_CUTOFF; + if (!string || !regex || string.length > textLengthCutoff) return false; return regex.test(string); } diff --git a/swift-package/Resources/assets/autofill.js b/swift-package/Resources/assets/autofill.js index 25c5d33e3..f9bf11ec9 100644 --- a/swift-package/Resources/assets/autofill.js +++ b/swift-package/Resources/assets/autofill.js @@ -6641,9 +6641,6 @@ class FormAnalyzer { */ this.signals = []; - // Analyse the input that was passed. This is pretty arbitrary, but historically it's been working nicely. - this.evaluateElAttributes(input, 1, true); - // If we have a meaningful container (a form), check that, otherwise check the whole page if (form !== input) { this.evaluateForm(); @@ -6740,7 +6737,7 @@ class FormAnalyzer { return this; } const signupRegexToUse = this.matching.getDDGMatcherRegex(shouldBeConservative ? 'conservativeSignupRegex' : 'signupRegex'); - const matchesSignup = (0, _autofillUtils.safeRegexTest)(/new.?password/i, string) || (0, _autofillUtils.safeRegexTest)(signupRegexToUse, string); + const matchesSignup = (0, _autofillUtils.safeRegexTest)(/new.?(password|username)/i, string) || (0, _autofillUtils.safeRegexTest)(signupRegexToUse, string); // In some cases a login match means the login is somewhere else, i.e. when a link points outside if (shouldFlip) { @@ -6822,72 +6819,12 @@ class FormAnalyzer { } }); } - - /** - * Takes an element and returns all its children that are text-only nodes - * @param {HTMLElement|Element} element - * @param {number} maxDepth - * @param {number} currentDepth - * @returns {HTMLElement[]|Element[]} - */ - getElementsWithOnlyTextChild(element) { - let maxDepth = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 2; - let currentDepth = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 0; - // Array to collect elements with only text child nodes - const elementsWithTextChild = []; - - // If we've reached the max depth, stop traversing further - if (currentDepth > maxDepth) { - return elementsWithTextChild; - } - - // Check if the current element has only one text child node - if (element.nodeType === Node.ELEMENT_NODE) { - const childNodes = element.childNodes; - if (childNodes.length === 1 && childNodes[0].nodeType === Node.TEXT_NODE) { - elementsWithTextChild.push(element); - } - } - - // Recurse through each child element and collect matching elements - for (const child of element.children) { - // Recursively get elements from child elements, increasing depth by 1 - elementsWithTextChild.push(...this.getElementsWithOnlyTextChild(child, maxDepth, currentDepth + 1)); - } - return elementsWithTextChild; - } - evaluateFormHeaderSignals() { - const isVisuallyBeforeForm = el => el.getBoundingClientRect().top < this.form.getBoundingClientRect().top; - const isHeaderSized = el => { - if (el instanceof HTMLHeadingElement) { - return true; - } - const computedStyle = window.getComputedStyle(el); - const fontWeight = computedStyle.fontWeight; - const isRelativelyTall = parseFloat(computedStyle.height) / this.form.clientHeight > 0.1; - if (isRelativelyTall && (fontWeight === 'bold' || parseFloat(fontWeight) >= 700)) { - return true; - } - }; - const allSiblings = Array.from(this.form.parentElement?.children ?? []).filter(element => element !== this.form).map(element => this.getElementsWithOnlyTextChild(element)).flat(); - if (allSiblings.length === 0) return false; - allSiblings.forEach(element => { - if (element instanceof HTMLElement && isVisuallyBeforeForm(element) && isHeaderSized(element)) { - const string = element.textContent?.trim(); - if (string) { - if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?in|log[- ]?in)$/i, string)) { - return this.decreaseSignalBy(3, 'Strong login signal above form'); - } else if ((0, _autofillUtils.safeRegexTest)(/^(sign[- ]?up)$/i, string)) { - return this.increaseSignalBy(3, 'Strong signup signal above form'); - } - } - } - }); - } evaluatePasswordHints() { - const hasPasswordHints = Array.from(this.form.querySelectorAll('div, span')).filter(div => div.textContent != null && div.textContent.trim() !== '' && window.getComputedStyle(div).display !== 'none' && window.getComputedStyle(div).visibility !== 'hidden').some(div => div.textContent && (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), div.textContent)); - if (hasPasswordHints) { - this.increaseSignalBy(3, 'Password hints'); + if (this.form.textContent) { + const hasPasswordHints = (0, _autofillUtils.safeRegexTest)(this.matching.getDDGMatcherRegex('passwordHintsRegex'), this.form.textContent, 200); + if (hasPasswordHints) { + this.increaseSignalBy(5, 'Password hints'); + } } } @@ -6981,6 +6918,11 @@ class FormAnalyzer { // Check page title this.evaluatePageTitle(); + // Evaluate form's input elements + this.form.querySelectorAll(this.matching.cssSelector('formInputsSelector')).forEach(input => { + this.evaluateElAttributes(input, 1, true); + }); + // Check form attributes this.evaluateElAttributes(this.form); @@ -7004,10 +6946,9 @@ class FormAnalyzer { this.increaseSignalBy(relevantFields.length * 1.5, 'many fields: it is probably not a login'); } - // If we can't decide at this point, try reading form headers and password hints + // If we can't decide at this point, try reading password hints if (this.areLoginOrSignupSignalsWeak()) { this.evaluatePasswordHints(); - this.evaluateFormHeaderSignals(); } // If we can't decide at this point, try reading page headings @@ -13437,7 +13378,8 @@ function isFormLikelyToBeUsedAsPageWrapper(form) { * @returns {boolean} */ function safeRegexTest(regex, string) { - if (!string || !regex || string.length > _constants.constants.TEXT_LENGTH_CUTOFF) return false; + let textLengthCutoff = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : _constants.constants.TEXT_LENGTH_CUTOFF; + if (!string || !regex || string.length > textLengthCutoff) return false; return regex.test(string); } diff --git a/test-forms/index.json b/test-forms/index.json index 02daabde9..b7f6ee7aa 100644 --- a/test-forms/index.json +++ b/test-forms/index.json @@ -308,7 +308,7 @@ { "html": "weblogin_utoronto_ca_login.html", "generated": true, "title": "weblogin | University of Toronto", "comment": "rank: 1280" }, { "html": "customerportal_mastercard_com_login.html", "generated": true, "title": "Sign in - Mastercard", "comment": "rank: 1362" }, { "html": "www_khanacademy_org_login.html", "generated": true, "title": "Khan Academy", "comment": "rank: 1379" }, - { "html": "accounts_hindustantimes_com_login.html", "generated": true, "title": "Livemint Subscription Plan and Pricing", "comment": "rank: 1467" }, + { "html": "accounts_hindustantimes_com_login.html", "generated": true, "title": "Livemint Subscription Plan and Pricing", "comment": "rank: 1467", "expectedFailures": ["emailAddress"] }, { "html": "rule34_us_signup.html", "generated": true, "title": "Rule34 - If it exists, there is porn of it", "comment": "rank: 1479" }, { "html": "gsw_gda_pl_login.html", "generated": true, "title": "Log in | Gdańska Szkoła Wyższa", "comment": "rank: 1504" }, { "html": "secure_xserver_ne_jp_login.html", "generated": true, "title": "Xserverアカウント - ログイン | レンタルサーバーならエックスサーバー", "comment": "rank: 1523" }, From cb29187e7b023284f1cb12ff0f3ffd58a6cf4f18 Mon Sep 17 00:00:00 2001 From: dbajpeyi Date: Fri, 20 Dec 2024 16:48:32 +0100 Subject: [PATCH 6/6] test: add form --- dist/autofill-debug.js | 2 +- dist/autofill.js | 2 +- src/Form/FormAnalyzer.js | 2 +- .../Resources/assets/autofill-debug.js | 2 +- swift-package/Resources/assets/autofill.js | 2 +- test-forms/index.json | 3 +- test-forms/sleeper_com_signup.html | 53 +++++++++++++++++++ 7 files changed, 60 insertions(+), 6 deletions(-) create mode 100644 test-forms/sleeper_com_signup.html diff --git a/dist/autofill-debug.js b/dist/autofill-debug.js index baf651b43..eace5e36a 100644 --- a/dist/autofill-debug.js +++ b/dist/autofill-debug.js @@ -11281,7 +11281,7 @@ class FormAnalyzer { // Check page title this.evaluatePageTitle(); - // Evaluate form's input elements + // Evaluate attributes of form's input elements this.form.querySelectorAll(this.matching.cssSelector('formInputsSelector')).forEach(input => { this.evaluateElAttributes(input, 1, true); }); diff --git a/dist/autofill.js b/dist/autofill.js index e3844c5d9..595e6dbbf 100644 --- a/dist/autofill.js +++ b/dist/autofill.js @@ -6918,7 +6918,7 @@ class FormAnalyzer { // Check page title this.evaluatePageTitle(); - // Evaluate form's input elements + // Evaluate attributes of form's input elements this.form.querySelectorAll(this.matching.cssSelector('formInputsSelector')).forEach(input => { this.evaluateElAttributes(input, 1, true); }); diff --git a/src/Form/FormAnalyzer.js b/src/Form/FormAnalyzer.js index 6814afe6c..69213362d 100644 --- a/src/Form/FormAnalyzer.js +++ b/src/Form/FormAnalyzer.js @@ -321,7 +321,7 @@ class FormAnalyzer { // Check page title this.evaluatePageTitle(); - // Evaluate form's input elements + // Evaluate attributes of form's input elements this.form.querySelectorAll(this.matching.cssSelector('formInputsSelector')).forEach((input) => { this.evaluateElAttributes(input, 1, true); }); diff --git a/swift-package/Resources/assets/autofill-debug.js b/swift-package/Resources/assets/autofill-debug.js index baf651b43..eace5e36a 100644 --- a/swift-package/Resources/assets/autofill-debug.js +++ b/swift-package/Resources/assets/autofill-debug.js @@ -11281,7 +11281,7 @@ class FormAnalyzer { // Check page title this.evaluatePageTitle(); - // Evaluate form's input elements + // Evaluate attributes of form's input elements this.form.querySelectorAll(this.matching.cssSelector('formInputsSelector')).forEach(input => { this.evaluateElAttributes(input, 1, true); }); diff --git a/swift-package/Resources/assets/autofill.js b/swift-package/Resources/assets/autofill.js index e3844c5d9..595e6dbbf 100644 --- a/swift-package/Resources/assets/autofill.js +++ b/swift-package/Resources/assets/autofill.js @@ -6918,7 +6918,7 @@ class FormAnalyzer { // Check page title this.evaluatePageTitle(); - // Evaluate form's input elements + // Evaluate attributes of form's input elements this.form.querySelectorAll(this.matching.cssSelector('formInputsSelector')).forEach(input => { this.evaluateElAttributes(input, 1, true); }); diff --git a/test-forms/index.json b/test-forms/index.json index 84be0a657..f6354c7c8 100644 --- a/test-forms/index.json +++ b/test-forms/index.json @@ -548,5 +548,6 @@ { "html": "accounts_oneplus_login.html"}, { "html": "google_password_manager_search.html"}, { "html": "paperlesspost_login.html"}, - { "html": "deltamath_reset_password.html"} + { "html": "deltamath_reset_password.html"}, + { "html": "sleeper_com_signup.html", "expectedSubmitFalseNegatives": 1 } ] diff --git a/test-forms/sleeper_com_signup.html b/test-forms/sleeper_com_signup.html new file mode 100644 index 000000000..a9ef2f645 --- /dev/null +++ b/test-forms/sleeper_com_signup.html @@ -0,0 +1,53 @@ +