gecko-dev/toolkit/components/passwordmgr/SignUpFormRuleset.jsm
jneuberger 319666faa8 Bug 1815446 - Integrating SignUpScenarioRuleset r=credential-management-reviewers,dimi
Integration of the SignUpFormRuleset (Fathom)

New preferences: signon.signupDetection.enabled and signon.signupDetection.confidenceThreshold

Differential Revision: https://phabricator.services.mozilla.com/D169503
2023-02-24 19:53:00 +00:00

566 lines
20 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/**
* Machine learning model for identifying sign up scenario forms
*/
"use strict";
const EXPORTED_SYMBOLS = ["SignUpFormRuleset"];
const {
dom,
out,
rule,
ruleset,
score,
type,
element,
utils: { isVisible, attributesMatch, min, setDefault },
clusters: { euclidean },
} = ChromeUtils.importESModule(
"resource://gre/modules/third_party/fathom/fathom.mjs"
);
const DEVELOPMENT = false;
/**
* --- START OF RULESET ---
*/
const coefficients = {
signup: new Map([
["formMethodIsPost", -0.7543129920959473],
["formAttributesMatchRegisterRegex", 1.7264184951782227],
["formAttributesMatchLoginRegex", -0.9329696297645569],
["formAttributesMatchNewsletterRegex", -2.206372022628784],
["formHasAcNewPassword", 1.5736613273620605],
["formHasAcCurrentPassword", -0.12390841543674469],
["formHasAcEmail", 0.6157014966011047],
["formHasAcUsername", -0.7274730801582336],
["formHasAcTel", -1.1380716562271118],
["formHasEmailField", 1.7712397575378418],
["formHasUsernameField", 1.311736822128296],
["formHasPasswordField", 1.3901746273040771],
["formHasEmailAndExtraNameField", 1.0147123336791992],
["formHasFirstOrLastNameFields", 0.6080058217048645],
["formHasBirthdayFields", 1.2841497659683228],
["formHasPhoneField", 0.8433054685592651],
["formHasRegisterButton", 1.1531426906585693],
["formHasLoginButton", -1.1045818328857422],
["formHasSubscribeButton", -2.290463447570801],
["formHasContinueButton", 2.2985446453094482],
["closestElementIsEmailLabelLike", 0.7776230573654175],
["closestElementIsNewPasswordLabelLike", 0.814133882522583],
["formHasTermsAndConditionsCheckbox", 0.6034160852432251],
["formHasRememberMeCheckbox", -2.623626470565796],
["formHasSubcriptionCheckbox", 0.5114848613739014],
["docTitleMatchesRegisterRegex", 1.2882719039916992],
["docTitleMatchesEditProfileRegex", -2.381089687347412],
["docHasRegisterOrPasswordForgottenHyperlink", -1.792033076286316],
["docHasLoginHyperlink", -0.621324896812439],
["closestHeaderMatchesRegisterRegex", 1.826246738433838],
["closestHeaderMatchesLoginRegex", -0.7161067128181458],
["closestHeaderMatchesNewsletterRegex", -1.6708611249923706],
]),
};
const biases = [["signup", -2.210782766342163]];
const loginRegex = /login|log-in|log_in|log in|signon|sign-on|sign_on|sign on|signin|sign-in|sign_in|sign in|einloggen|anmelden|logon|log-on|lgon_on|log on|Войти|ورود|登录|Přihlásit se|Přihlaste|Авторизоваться|Авторизация|entrar|ログイン|로그인|inloggen|Συνδέσου|accedi|ログオン|Giriş Yap|登入|connecter|connectez-vous|Connexion|Вход|inicia|inloggen/gi;
const registerRegex = /create|regist[a-z]|sign up|signup|sign-up|sign_up|join|new|登録|neu|erstellen|choose|設定|신규|Créer|Nouveau|baru|nouă|nieuw|create[a-zA-Z\s]+account|activate[a-zA-Z\s]+account|Zugang anlegen|Angaben prüfen|Konto erstellen|ثبت نام|登録|注册|cadastr|Зарегистрироваться|Регистрация|Bellige alynmak|تسجيل|ΕΓΓΡΑΦΗΣ|Εγγραφή|Créer mon compte|Créer un compte|Mendaftar|가입하기|inschrijving|Zarejestruj się|Deschideți un cont|Создать аккаунт|ร่วม|Üye Ol|ساخت حساب کاربری|Schrijf je|S'inscrire/gi;
const emailRegex = /mail/gi;
const usernameRegex = /user|name|member/gi;
const newPasswordRegex = /new|create/gi;
//const confirmAttrRegex = /confirm|retype|again|bevestigen|wiederhol|repeat|confirmation|verify|retype|repite|確認|の確認|تکرار|re-enter|확인|bevestigen|Повторите|tassyklamak|再次输入|ještě jednou|gentag|re-type|Répéter|conferma|Repetaţi|reenter|再入力|재입력|Ulangi|Bekræft/gi;
const nameRegex = /first|last|middle/gi;
const birthdateRegex = /birth|year|yyyy/gi;
const phoneRegex = /phone|mobile|tel|number/gi;
const newsletterRegex = /subscri|newsletter|trial|offer|information|angebote|probe|ニュースレター|abonn[a-z]/gi;
const termsAndConditionsRegex = /accept|agree|read|terms|condition|rules|policy|privacy|akzeptier|gelesen|nutzungbedingungen|AGB|términos|condiciones|/gi;
const pwForgottenRegex = /forgot|reset|set password|vergessen|vergeten|oublié|dimenticata|Esqueceu|esqueci|Забыли|忘记|找回|Zapomenuté|lost|忘れた|忘れられた|忘れの方|재설정|찾기|help|فراموشی| را فراموش کرده اید|Восстановить|Unuttu|perdus|重新設定|recover|remind|request|restore|trouble|olvidada/gi;
const continueRegex = /continue|go on|weiter|fortfahren|ga verder|next|continuar/gi;
const rememberMeRegex = /remember|stay|speichern|merken|bleiben|auto_login|auto-login|auto login|ricordami|manter|mantenha|savelogin|keep me logged in|keep me signed in|save email address|save id|stay signed in|次回からログオンIDの入力を省略する|メールアドレスを保存する|を保存|아이디저장|아이디 저장|로그인 상태 유지|lembrar|mantenha-me conectado|Запомни меня|запомнить меня|Запомните меня|Не спрашивать в следующий раз|下次自动登录|记住我|recordar/gi;
const alreadySignedUpRegex = /already|bereits|schon|ya tienes cuenta/gi;
const editProfile = /edit|profile/gi;
function createRuleset(coeffs, biases) {
let elementToSelectors;
/**
* Check document characteristics
*/
function docHasLoginHyperlink(fnode) {
const links = getElementDescendants(fnode.element.ownerDocument, "a");
return links.some(
link =>
checkValueAgainstRegex(link.innerText, loginRegex) ||
checkValueAgainstRegex(link.innerText, alreadySignedUpRegex)
);
}
function docHasRegisterOrPasswordForgottenHyperlink(fnode) {
const links = getElementDescendants(fnode.element.ownerDocument, "a");
return links.some(
link =>
checkValueAgainstRegex(link.innerText, registerRegex) ||
checkValueAgainstRegex(link.innerText, pwForgottenRegex)
);
}
function docTitleMatchesRegisterRegex(fnode) {
const docTitle = fnode.element.ownerDocument.title;
return checkValueAgainstRegex(docTitle, registerRegex);
}
function docTitleMatchesEditProfileRegex(fnode) {
const docTitle = fnode.element.ownerDocument.title;
return checkValueAgainstRegex(docTitle, editProfile);
}
/**
* Check header
*/
function closestHeaderMatchesLoginRegex(fnode) {
return (
headerInFormMatchesRegex(fnode.element, loginRegex) ||
closestHeaderAboveMatchesRegex(fnode.element, loginRegex)
);
}
function closestHeaderMatchesRegisterRegex(fnode) {
return (
headerInFormMatchesRegex(fnode.element, registerRegex) ||
closestHeaderAboveMatchesRegex(fnode.element, registerRegex)
);
}
function closestHeaderMatchesNewsletterRegex(fnode) {
return (
headerInFormMatchesRegex(fnode.element, newsletterRegex) ||
closestHeaderAboveMatchesRegex(fnode.element, newsletterRegex)
);
}
/**
* Check Checkboxes
*/
function formHasRememberMeCheckbox(fnode) {
return checkboxInFormMatchesRegex(fnode.element, rememberMeRegex);
}
function formHasSubcriptionCheckbox(fnode) {
return checkboxInFormMatchesRegex(fnode.element, newsletterRegex);
}
function formHasTermsAndConditionsCheckbox(fnode) {
return checkboxInFormMatchesRegex(fnode.element, termsAndConditionsRegex);
}
/**
* Check input fields
*/
function formHasPhoneField(fnode) {
return formContainsRegexMatchingElement(fnode.element, "input", phoneRegex);
}
function formHasBirthdayFields(fnode) {
return formContainsRegexMatchingElement(
fnode.element,
"input,select",
birthdateRegex
);
}
function formHasFirstOrLastNameFields(fnode) {
return formContainsRegexMatchingElement(fnode.element, "input", nameRegex);
}
function formHasEmailAndExtraNameField(fnode) {
const possibleFields = getElementDescendants(
fnode.element,
"input[type=email],input[type=text]"
);
let containsEmail = false;
let containsAnyName = false;
for (const field of possibleFields) {
if (
attributesMatch(
field,
attr => checkValueAgainstRegex(attr, emailRegex),
["id", "name", "className", "placeholder"]
)
) {
if (containsAnyName) {
return true;
} else if (!containsEmail) {
containsEmail = true;
}
} else if (
attributesMatch(
field,
attr =>
checkValueAgainstRegex(attr, usernameRegex) ||
checkValueAgainstRegex(attr, nameRegex)
)
) {
if (containsEmail) {
return true;
} else if (!containsAnyName) {
containsAnyName = true;
}
}
}
return false;
}
function formHasEmailField(fnode) {
return atLeastOne(getEmailInputElements(fnode.element));
}
function formHasUsernameField(fnode) {
return formContainsRegexMatchingElement(
fnode.element,
"input",
usernameRegex
);
}
function formHasPasswordField(fnode) {
return checkInputFieldsForAttr(fnode.element, "type=password");
}
/**
* Check autocomplete values
*/
function formHasAcUsername(fnode) {
return checkInputFieldsForAttr(fnode.element, "autocomplete=username");
}
function formHasAcEmail(fnode) {
return checkInputFieldsForAttr(fnode.element, "autocomplete=email");
}
function formHasAcCurrentPassword(fnode) {
return checkInputFieldsForAttr(
fnode.element,
"autocomplete=current-password"
);
}
function formHasAcNewPassword(fnode) {
return checkInputFieldsForAttr(fnode.element, "autocomplete=new-password");
}
function formHasAcTel(fnode) {
return checkInputFieldsForAttr(fnode.element, "autocomplete*=tel");
}
/**
* Check labels
*/
function closestElementIsNewPasswordLabelLike(fnode) {
const passwordFields = getElementDescendants(
fnode.element,
"input[type=password][autocomplete=new-password]"
);
return closestElementIsRegexMatchingLabel(passwordFields, newPasswordRegex);
}
function closestElementIsEmailLabelLike(fnode) {
const emailFields = getEmailInputElements(fnode.element);
return closestElementIsRegexMatchingLabel(emailFields, emailRegex);
}
/**
* Check buttons
*/
function formHasRegisterButton(fnode) {
const buttons = getButtons(fnode.element);
return buttons.some(button =>
checkValueAgainstRegex(button.innerText, registerRegex)
);
}
function formHasLoginButton(fnode) {
const buttons = getButtons(fnode.element);
return buttons.some(button =>
checkValueAgainstRegex(button.innerText, loginRegex)
);
}
function formHasContinueButton(fnode) {
const buttons = getButtons(fnode.element);
return buttons.some(button =>
checkValueAgainstRegex(button.innerText, continueRegex)
);
}
function formHasSubscribeButton(fnode) {
const buttons = getButtons(fnode.element);
return buttons.some(button =>
checkValueAgainstRegex(button.innerText, newsletterRegex)
);
}
/**
* Check form attributes
*/
function formAttributesMatchRegisterRegex(fnode) {
return attributesMatch(
fnode.element,
attr => checkValueAgainstRegex(attr, registerRegex),
["action", "id", "name", "className"]
);
}
function formAttributesMatchLoginRegex(fnode) {
return attributesMatch(
fnode.element,
attr => checkValueAgainstRegex(attr, loginRegex),
["action", "id", "name", "className"]
);
}
function formAttributesMatchNewsletterRegex(fnode) {
return attributesMatch(fnode.element, attr =>
checkValueAgainstRegex(attr, newsletterRegex)
);
}
function formMethodIsPost(fnode) {
return fnode.element.method === "post";
}
/**
* HELPER FUNCTIONS
*/
function closestElementIsRegexMatchingLabel(elements, regexExp) {
return elements.some(elem => {
const previousElem = elem.previousElementSibling;
const closestLabel = closestSelectorElementWithinElement(
elem,
previousElem,
"label"
);
return (
closestLabel && checkValueAgainstRegex(closestLabel.innerText, regexExp)
);
});
}
function formContainsRegexMatchingElement(element, selector, regexExp) {
const matchingElements = getElementDescendants(element, selector);
return matchingElements.some(elem =>
attributesMatch(elem, attr => checkValueAgainstRegex(attr, regexExp))
);
}
function checkInputFieldsForAttr(element, attr) {
return atLeastOne(getElementDescendants(element, `input[${attr}]`));
}
function headerInFormMatchesRegex(element, regexExp) {
const headers = getElementDescendants(
element,
"h1,h2,h3,h4,h5,h6,div[class*=heading],div[class*=header],div[class*=title],header"
);
return headers.some(head =>
checkValueAgainstRegex(head.innerText, regexExp)
);
}
function checkboxInFormMatchesRegex(element, regexExp) {
const checkboxes = getElementDescendants(element, "input[type=checkbox]");
return checkboxes.some(box =>
attributesMatch(box, attr => checkValueAgainstRegex(attr, regexExp))
);
}
function getEmailInputElements(element) {
const possibleEmailFields = getElementDescendants(element, "input");
return possibleEmailFields.filter(field => {
return attributesMatch(
field,
attr => checkValueAgainstRegex(attr, emailRegex),
["type", "id", "name", "className", "placeholder", "innerText"]
);
});
}
function getButtons(element) {
return getElementDescendants(
element,
"button,input[type=submit],input[type=button]"
);
}
function getElementDescendants(element, selector) {
const selectorToDescendants = setDefault(
elementToSelectors,
element,
() => new Map()
);
return setDefault(
selectorToDescendants, // eslint-disable-line prettier/prettier
selector,
() => Array.from(element.querySelectorAll(selector))
);
}
function clearCache() {
elementToSelectors = new WeakMap();
}
function closestSelectorElementWithinElement(
toElement,
withinElement,
querySelector
) {
if (withinElement) {
let matchingElements = Array.from(
withinElement.querySelectorAll(querySelector)
);
if (matchingElements.length) {
return min(matchingElements, match => euclidean(match, toElement));
}
}
return null;
}
function closestHeaderAboveMatchesRegex(element, regex) {
const closestHeader = closestElementAbove(
element,
"h1,h2,h3,h4,h5,h6,div[class*=heading],div[class*=header],div[class*=title],header"
);
if (closestHeader == null) {
return false;
}
return checkValueAgainstRegex(closestHeader.innerText, regex);
}
function closestElementAbove(element, selector) {
let elements = Array.from(
getElementDescendants(element.ownerDocument, selector)
);
for (let i = elements.length - 1; i >= 0; --i) {
if (
element.compareDocumentPosition(elements[i]) &
Node.DOCUMENT_POSITION_PRECEDING
) {
return elements[i];
}
}
return null;
}
function checkValueAgainstRegex(value, regexExp) {
const lowerCaseValue = value ? value.toLowerCase() : "";
return regexExp.test(lowerCaseValue);
}
function atLeastOne(iter) {
return iter.length >= 1;
}
const rules = ruleset(
[
rule(
DEVELOPMENT ? dom("form").when(isVisible) : element("form"),
type("form").note(clearCache)
),
// Check form attributes
rule(type("form"), score(formAttributesMatchRegisterRegex), {
name: "formAttributesMatchRegisterRegex",
}),
rule(type("form"), score(formMethodIsPost), { name: "formMethodIsPost" }),
rule(type("form"), score(formAttributesMatchLoginRegex), {
name: "formAttributesMatchLoginRegex",
}),
rule(type("form"), score(formAttributesMatchNewsletterRegex), {
name: "formAttributesMatchNewsletterRegex",
}),
// Check autocomplete attributes
rule(type("form"), score(formHasAcCurrentPassword), {
name: "formHasAcCurrentPassword",
}),
rule(type("form"), score(formHasAcNewPassword), {
name: "formHasAcNewPassword",
}),
rule(type("form"), score(formHasAcTel), {
name: "formHasAcTel",
}),
rule(type("form"), score(formHasAcUsername), {
name: "formHasAcUsername",
}),
rule(type("form"), score(formHasAcEmail), {
name: "formHasAcEmail",
}),
// Check input fields
rule(type("form"), score(formHasEmailField), {
name: "formHasEmailField",
}),
rule(type("form"), score(formHasUsernameField), {
name: "formHasUsernameField",
}),
rule(type("form"), score(formHasEmailAndExtraNameField), {
name: "formHasEmailAndExtraNameField",
}),
rule(type("form"), score(formHasPasswordField), {
name: "formHasPasswordField",
}),
rule(type("form"), score(formHasFirstOrLastNameFields), {
name: "formHasFirstOrLastNameFields",
}),
rule(type("form"), score(formHasBirthdayFields), {
name: "formHasBirthdayFields",
}),
rule(type("form"), score(formHasPhoneField), {
name: "formHasPhoneField",
}),
// Check buttons
rule(type("form"), score(formHasRegisterButton), {
name: "formHasRegisterButton",
}),
rule(type("form"), score(formHasLoginButton), {
name: "formHasLoginButton",
}),
rule(type("form"), score(formHasContinueButton), {
name: "formHasContinueButton",
}),
rule(type("form"), score(formHasSubscribeButton), {
name: "formHasSubscribeButton",
}),
// Check labels
rule(type("form"), score(closestElementIsEmailLabelLike), {
name: "closestElementIsEmailLabelLike",
}),
rule(type("form"), score(closestElementIsNewPasswordLabelLike), {
name: "closestElementIsNewPasswordLabelLike",
}),
// Check checkboxes
rule(type("form"), score(formHasTermsAndConditionsCheckbox), {
name: "formHasTermsAndConditionsCheckbox",
}),
rule(type("form"), score(formHasRememberMeCheckbox), {
name: "formHasRememberMeCheckbox",
}),
rule(type("form"), score(formHasSubcriptionCheckbox), {
name: "formHasSubcriptionCheckbox",
}),
// Check header
rule(type("form"), score(closestHeaderMatchesRegisterRegex), {
name: "closestHeaderMatchesRegisterRegex",
}),
rule(type("form"), score(closestHeaderMatchesLoginRegex), {
name: "closestHeaderMatchesLoginRegex",
}),
rule(type("form"), score(closestHeaderMatchesNewsletterRegex), {
name: "closestHeaderMatchesNewsletterRegex",
}),
// Check doc characteristics
rule(type("form"), score(docTitleMatchesRegisterRegex), {
name: "docTitleMatchesRegisterRegex",
}),
rule(type("form"), score(docTitleMatchesEditProfileRegex), {
name: "docTitleMatchesEditProfileRegex",
}),
rule(type("form"), score(docHasLoginHyperlink), {
name: "docHasLoginHyperlink",
}),
rule(type("form"), score(docHasRegisterOrPasswordForgottenHyperlink), {
name: "docHasRegisterOrPasswordForgottenHyperlink",
}),
rule(type("form"), out("form")),
],
coeffs,
biases
);
return rules;
}
/**
* --- END OF RULESET ---
*/
const SignUpFormRuleset = {
type: "form",
rules: createRuleset([...coefficients.signup], biases),
};