/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* * Form Autofill field heuristics. */ "use strict"; this.EXPORTED_SYMBOLS = ["FormAutofillHeuristics", "LabelUtils"]; const {classes: Cc, interfaces: Ci, utils: Cu, results: Cr} = Components; Cu.import("resource://gre/modules/Services.jsm"); Cu.import("resource://gre/modules/XPCOMUtils.jsm"); Cu.import("resource://formautofill/FormAutofillUtils.jsm"); this.log = null; FormAutofillUtils.defineLazyLogGetter(this, this.EXPORTED_SYMBOLS[0]); const PREF_HEURISTICS_ENABLED = "extensions.formautofill.heuristics.enabled"; /** * A scanner for traversing all elements in a form and retrieving the field * detail with FormAutofillHeuristics.getInfo function. It also provides a * cursor (parsingIndex) to indicate which element is waiting for parsing. */ class FieldScanner { /** * Create a FieldScanner based on form elements with the existing * fieldDetails. * * @param {Array.DOMElement} elements * The elements from a form for each parser. */ constructor(elements) { this._elementsWeakRef = Cu.getWeakReference(elements); this.fieldDetails = []; this._parsingIndex = 0; } get _elements() { return this._elementsWeakRef.get(); } /** * This cursor means the index of the element which is waiting for parsing. * * @returns {number} * The index of the element which is waiting for parsing. */ get parsingIndex() { return this._parsingIndex; } /** * Move the parsingIndex to the next elements. Any elements behind this index * means the parsing tasks are finished. * * @param {number} index * The latest index of elements waiting for parsing. */ set parsingIndex(index) { if (index > this.fieldDetails.length) { throw new Error("The parsing index is out of range."); } this._parsingIndex = index; } /** * Retrieve the field detail by the index. If the field detail is not ready, * the elements will be traversed until matching the index. * * @param {number} index * The index of the element that you want to retrieve. * @returns {Object} * The field detail at the specific index. */ getFieldDetailByIndex(index) { if (index >= this._elements.length) { throw new Error(`The index ${index} is out of range.(${this._elements.length})`); } if (index < this.fieldDetails.length) { return this.fieldDetails[index]; } for (let i = this.fieldDetails.length; i < (index + 1); i++) { this.pushDetail(); } return this.fieldDetails[index]; } get parsingFinished() { return this.parsingIndex >= this._elements.length; } /** * This function will prepare an autocomplete info object with getInfo * function and push the detail to fieldDetails property. Any duplicated * detail will be marked as _duplicated = true for the parser. * * Any element without the related detail will be used for adding the detail * to the end of field details. */ pushDetail() { let elementIndex = this.fieldDetails.length; if (elementIndex >= this._elements.length) { throw new Error("Try to push the non-existing element info."); } let element = this._elements[elementIndex]; let info = FormAutofillHeuristics.getInfo(element); if (!info) { info = {}; } let fieldInfo = { section: info.section, addressType: info.addressType, contactType: info.contactType, fieldName: info.fieldName, elementWeakRef: Cu.getWeakReference(element), }; if (info._reason) { fieldInfo._reason = info._reason; } // Store the association between the field metadata and the element. if (this.findSameField(info) != -1) { // A field with the same identifier already exists. log.debug("Not collecting a field matching another with the same info:", info); fieldInfo._duplicated = true; } this.fieldDetails.push(fieldInfo); } /** * When a field detail should be changed its fieldName after parsing, use * this function to update the fieldName which is at a specific index. * * @param {number} index * The index indicates a field detail to be updated. * @param {string} fieldName * The new fieldName */ updateFieldName(index, fieldName) { if (index >= this.fieldDetails.length) { throw new Error("Try to update the non-existing field detail."); } this.fieldDetails[index].fieldName = fieldName; delete this.fieldDetails[index]._duplicated; let indexSame = this.findSameField(this.fieldDetails[index]); if (indexSame != index && indexSame != -1) { this.fieldDetails[index]._duplicated = true; } } findSameField(info) { return this.fieldDetails.findIndex(f => f.section == info.section && f.addressType == info.addressType && f.contactType == info.contactType && f.fieldName == info.fieldName); } /** * Provide the field details without invalid field name and duplicated fields. * * @returns {Array} * The array with the field details without invalid field name and * duplicated fields. */ get trimmedFieldDetail() { return this.fieldDetails.filter(f => f.fieldName && !f._duplicated); } elementExisting(index) { return index < this._elements.length; } } this.LabelUtils = { // The tag name list is from Chromium except for "STYLE": // eslint-disable-next-line max-len // https://cs.chromium.org/chromium/src/components/autofill/content/renderer/form_autofill_util.cc?l=216&rcl=d33a171b7c308a64dc3372fac3da2179c63b419e EXCLUDED_TAGS: ["SCRIPT", "NOSCRIPT", "OPTION", "STYLE"], // A map object, whose keys are the id's of form fields and each value is an // array consisting of label elements correponding to the id. // @type {Map} _mappedLabels: null, // An array consisting of label elements whose correponding form field doesn't // have an id attribute. // @type {Array.} _unmappedLabels: null, /** * Extract all strings of an element's children to an array. * "element.textContent" is a string which is merged of all children nodes, * and this function provides an array of the strings contains in an element. * * @param {Object} element * A DOM element to be extracted. * @returns {Array} * All strings in an element. */ extractLabelStrings(element) { let strings = []; let _extractLabelStrings = (el) => { if (this.EXCLUDED_TAGS.includes(el.tagName)) { return; } if (el.nodeType == Ci.nsIDOMNode.TEXT_NODE || el.childNodes.length == 0) { let trimmedText = el.textContent.trim(); if (trimmedText) { strings.push(trimmedText); } return; } for (let node of el.childNodes) { let nodeType = node.nodeType; if (nodeType != Ci.nsIDOMNode.ELEMENT_NODE && nodeType != Ci.nsIDOMNode.TEXT_NODE) { continue; } _extractLabelStrings(node); } }; _extractLabelStrings(element); return strings; }, generateLabelMap(doc) { let mappedLabels = new Map(); let unmappedLabels = []; for (let label of doc.querySelectorAll("label")) { let id = label.htmlFor; if (!id) { let control = label.control; if (!control) { continue; } id = control.id; } if (id) { let labels = mappedLabels.get(id); if (labels) { labels.push(label); } else { mappedLabels.set(id, [label]); } } else { unmappedLabels.push(label); } } this._mappedLabels = mappedLabels; this._unmappedLabels = unmappedLabels; }, clearLabelMap() { this._mappedLabels = null; this._unmappedLabels = null; }, findLabelElements(element) { if (!this._mappedLabels) { this.generateLabelMap(element.ownerDocument); } let id = element.id; if (!id) { return this._unmappedLabels.filter(label => label.control == element); } return this._mappedLabels.get(id) || []; }, }; /** * Returns the autocomplete information of fields according to heuristics. */ this.FormAutofillHeuristics = { RULES: null, /** * Try to match the telephone related fields to the grammar * list to see if there is any valid telephone set and correct their * field names. * * @param {FieldScanner} fieldScanner * The current parsing status for all elements * @returns {boolean} * Return true if there is any field can be recognized in the parser, * otherwise false. */ _parsePhoneFields(fieldScanner) { let matchingResult; const GRAMMARS = this.PHONE_FIELD_GRAMMARS; for (let i = 0; i < GRAMMARS.length; i++) { let detailStart = fieldScanner.parsingIndex; let ruleStart = i; for (; i < GRAMMARS.length && GRAMMARS[i][0] && fieldScanner.elementExisting(detailStart); i++, detailStart++) { let detail = fieldScanner.getFieldDetailByIndex(detailStart); if (!detail || GRAMMARS[i][0] != detail.fieldName || detail._reason == "autocomplete") { break; } let element = detail.elementWeakRef.get(); if (!element) { break; } if (GRAMMARS[i][2] && (!element.maxLength || GRAMMARS[i][2] < element.maxLength)) { break; } } if (i >= GRAMMARS.length) { break; } if (!GRAMMARS[i][0]) { matchingResult = { ruleFrom: ruleStart, ruleTo: i, }; break; } // Fast rewinding to the next rule. for (; i < GRAMMARS.length; i++) { if (!GRAMMARS[i][0]) { break; } } } let parsedField = false; if (matchingResult) { let {ruleFrom, ruleTo} = matchingResult; let detailStart = fieldScanner.parsingIndex; for (let i = ruleFrom; i < ruleTo; i++) { fieldScanner.updateFieldName(detailStart, GRAMMARS[i][1]); fieldScanner.parsingIndex++; detailStart++; parsedField = true; } } if (fieldScanner.parsingFinished) { return parsedField; } let nextField = fieldScanner.getFieldDetailByIndex(fieldScanner.parsingIndex); if (nextField && nextField.fieldName == "tel-extension") { fieldScanner.parsingIndex++; parsedField = true; } return parsedField; }, /** * Try to find the correct address-line[1-3] sequence and correct their field * names. * * @param {FieldScanner} fieldScanner * The current parsing status for all elements * @returns {boolean} * Return true if there is any field can be recognized in the parser, * otherwise false. */ _parseAddressFields(fieldScanner) { let parsedFields = false; let addressLines = ["address-line1", "address-line2", "address-line3"]; for (let i = 0; !fieldScanner.parsingFinished && i < addressLines.length; i++) { let detail = fieldScanner.getFieldDetailByIndex(fieldScanner.parsingIndex); if (!detail || !addressLines.includes(detail.fieldName)) { // When the field is not related to any address-line[1-3] fields, it // means the parsing process can be terminated. break; } fieldScanner.updateFieldName(fieldScanner.parsingIndex, addressLines[i]); fieldScanner.parsingIndex++; parsedFields = true; } return parsedFields; }, /** * This function should provide all field details of a form. The details * contain the autocomplete info (e.g. fieldName, section, etc). * * `allowDuplicates` is used for the xpcshell-test purpose currently because * the heuristics should be verified that some duplicated elements still can * be predicted correctly. * * @param {HTMLFormElement} form * the elements in this form to be predicted the field info. * @param {boolean} allowDuplicates * true to remain any duplicated field details otherwise to remove the * duplicated ones. * @returns {Array} * all field details in the form. */ getFormInfo(form, allowDuplicates = false) { if (form.elements.length <= 0) { return []; } let fieldScanner = new FieldScanner(form.elements); while (!fieldScanner.parsingFinished) { let parsedPhoneFields = this._parsePhoneFields(fieldScanner); let parsedAddressFields = this._parseAddressFields(fieldScanner); // If there is no any field parsed, the parsing cursor can be moved // forward to the next one. if (!parsedPhoneFields && !parsedAddressFields) { fieldScanner.parsingIndex++; } } LabelUtils.clearLabelMap(); if (allowDuplicates) { return fieldScanner.fieldDetails; } return fieldScanner.trimmedFieldDetail; }, getInfo(element) { if (!FormAutofillUtils.isFieldEligibleForAutofill(element)) { return null; } let info = element.getAutocompleteInfo(); // An input[autocomplete="on"] will not be early return here since it stll // needs to find the field name. if (info && info.fieldName && info.fieldName != "on" && info.fieldName != "off") { info._reason = "autocomplete"; return info; } if (!this._prefEnabled) { return null; } let isAutoCompleteOff = element.autocomplete == "off" || (element.form && element.form.autocomplete == "off"); // "email" type of input is accurate for heuristics to determine its Email // field or not. However, "tel" type is used for ZIP code for some web site // (e.g. HomeDepot, BestBuy), so "tel" type should be not used for "tel" // prediction. if (element.type == "email" && !isAutoCompleteOff) { return { fieldName: "email", section: "", addressType: "", contactType: "", }; } const FIELDNAMES_IGNORING_AUTOCOMPLETE_OFF = [ "cc-name", "cc-number", "cc-exp-month", "cc-exp-year", "cc-exp", ]; let regexps = isAutoCompleteOff ? FIELDNAMES_IGNORING_AUTOCOMPLETE_OFF : Object.keys(this.RULES); if (!FormAutofillUtils.isAutofillCreditCardsAvailable) { if (isAutoCompleteOff) { if (!this._regexpListOf_CcUnavailable_AcOff) { this._regexpListOf_CcUnavailable_AcOff = regexps.filter(name => !FormAutofillUtils.isCreditCardField(name)); } regexps = this._regexpListOf_CcUnavailable_AcOff; } else { if (!this._regexpListOf_CcUnavailable_AcOn) { this._regexpListOf_CcUnavailable_AcOn = regexps.filter(name => !FormAutofillUtils.isCreditCardField(name)); } regexps = this._regexpListOf_CcUnavailable_AcOn; } } if (regexps.length == 0) { return null; } let labelStrings; let getElementStrings = {}; getElementStrings[Symbol.iterator] = function* () { yield element.id; yield element.name; if (!labelStrings) { labelStrings = []; let labels = LabelUtils.findLabelElements(element); for (let label of labels) { labelStrings.push(...LabelUtils.extractLabelStrings(label)); } } yield *labelStrings; }; for (let regexp of regexps) { for (let string of getElementStrings) { // The original regexp "(? - country code field. * - area code field. * - phone or prefix. * - suffix. * - extension. * :N means field is limited to N characters, otherwise it is unlimited. * (pattern )? means pattern is optional and matched separately. * * This grammar list from Chromium will be enabled partially once we need to * support more cases of Telephone fields. */ PHONE_FIELD_GRAMMARS: [ // Country code: Area Code: Phone: (- // (Ext: )?)? // {REGEX_COUNTRY, FIELD_COUNTRY_CODE, 0}, // {REGEX_AREA, FIELD_AREA_CODE, 0}, // {REGEX_PHONE, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // \( \) :3 :4 (Ext: )? // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 3}, // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3}, // {REGEX_PHONE, FIELD_SUFFIX, 4}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: :3 - :3 - :4 (Ext: )? // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, // {REGEX_PHONE, FIELD_AREA_CODE, 3}, // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 3}, // {REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 4}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: :3 :3 :3 :4 (Ext: )? ["tel", "tel-country-code", 3], ["tel", "tel-area-code", 3], ["tel", "tel-local-prefix", 3], ["tel", "tel-local-suffix", 4], [null, null, 0], // Area Code: Phone: (- (Ext: )?)? // {REGEX_AREA, FIELD_AREA_CODE, 0}, // {REGEX_PHONE, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: :3 :4 (Ext: )? // {REGEX_PHONE, FIELD_AREA_CODE, 0}, // {REGEX_PHONE, FIELD_PHONE, 3}, // {REGEX_PHONE, FIELD_SUFFIX, 4}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: \( \) (- (Ext: )?)? // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0}, // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: \( \) (- (Ext: )?)? // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, // {REGEX_AREA_NOTEXT, FIELD_AREA_CODE, 0}, // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: - - - (Ext: )? // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, // {REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0}, // {REGEX_PREFIX_SEPARATOR, FIELD_PHONE, 0}, // {REGEX_SUFFIX_SEPARATOR, FIELD_SUFFIX, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Area code: :3 Prefix: :3 Suffix: :4 (Ext: )? // {REGEX_AREA, FIELD_AREA_CODE, 3}, // {REGEX_PREFIX, FIELD_PHONE, 3}, // {REGEX_SUFFIX, FIELD_SUFFIX, 4}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: Prefix: Suffix: (Ext: )? // {REGEX_PHONE, FIELD_AREA_CODE, 0}, // {REGEX_PREFIX, FIELD_PHONE, 0}, // {REGEX_SUFFIX, FIELD_SUFFIX, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: - :3 - :4 (Ext: )? ["tel", "tel-area-code", 0], ["tel", "tel-local-prefix", 3], ["tel", "tel-local-suffix", 4], [null, null, 0], // Phone: - - (Ext: )? // {REGEX_PHONE, FIELD_COUNTRY_CODE, 0}, // {REGEX_PREFIX_SEPARATOR, FIELD_AREA_CODE, 0}, // {REGEX_SUFFIX_SEPARATOR, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: - (Ext: )? // {REGEX_AREA, FIELD_AREA_CODE, 0}, // {REGEX_PHONE, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: :3 - :10 (Ext: )? // {REGEX_PHONE, FIELD_COUNTRY_CODE, 3}, // {REGEX_PHONE, FIELD_PHONE, 10}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Ext: // {REGEX_EXTENSION, FIELD_EXTENSION, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, // Phone: (Ext: )? // {REGEX_PHONE, FIELD_PHONE, 0}, // {REGEX_SEPARATOR, FIELD_NONE, 0}, ], }; XPCOMUtils.defineLazyGetter(this.FormAutofillHeuristics, "RULES", () => { let sandbox = {}; let scriptLoader = Cc["@mozilla.org/moz/jssubscript-loader;1"] .getService(Ci.mozIJSSubScriptLoader); const HEURISTICS_REGEXP = "chrome://formautofill/content/heuristicsRegexp.js"; scriptLoader.loadSubScript(HEURISTICS_REGEXP, sandbox, "utf-8"); return sandbox.HeuristicsRegExp.RULES; }); XPCOMUtils.defineLazyGetter(this.FormAutofillHeuristics, "_prefEnabled", () => { return Services.prefs.getBoolPref(PREF_HEURISTICS_ENABLED); }); Services.prefs.addObserver(PREF_HEURISTICS_ENABLED, () => { this.FormAutofillHeuristics._prefEnabled = Services.prefs.getBoolPref(PREF_HEURISTICS_ENABLED); });