fune/devtools/shared/css/parsing-utils.js

/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */
/* vim: set ts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// This file holds various CSS parsing and rewriting utilities.
// Some entry points of note are:
// parseDeclarations - parse a CSS rule into declarations
// parsePseudoClassesAndAttributes - parse selector and extract
//     pseudo-classes
// parseSingleValue - parse a single CSS property value

"use strict";

const {getCSSLexer} = require("devtools/shared/css/lexer");

loader.lazyRequireGetter(this, "CSS_ANGLEUNIT", "devtools/shared/css/constants", true);

const SELECTOR_ATTRIBUTE = exports.SELECTOR_ATTRIBUTE = 1;
const SELECTOR_ELEMENT = exports.SELECTOR_ELEMENT = 2;
const SELECTOR_PSEUDO_CLASS = exports.SELECTOR_PSEUDO_CLASS = 3;
const CSS_BLOCKS = { "(": ")", "[": "]", "{": "}" };

// When commenting out a declaration, we put this character into the
// comment opener so that future parses of the commented text know to
// bypass the property name validity heuristic.
const COMMENT_PARSING_HEURISTIC_BYPASS_CHAR =
  exports.COMMENT_PARSING_HEURISTIC_BYPASS_CHAR = "!";

/**
 * A generator function that lexes a CSS source string, yielding the
 * CSS tokens.  Comment tokens are dropped.
 *
 * @param {String} CSS source string
 * @yield {CSSToken} The next CSSToken that is lexed
 * @see CSSToken for details about the returned tokens
 */
function* cssTokenizer(string) {
  const lexer = getCSSLexer(string);
  while (true) {
    const token = lexer.nextToken();
    if (!token) {
      break;
    }
    // None of the existing consumers want comments.
    if (token.tokenType !== "comment") {
      yield token;
    }
  }
}

/**
 * Pass |string| to the CSS lexer and return an array of all the
 * returned tokens.  Comment tokens are not included.  In addition to
 * the usual information, each token will have starting and ending
 * line and column information attached.  Specifically, each token
 * has an additional "loc" attribute.  This attribute is an object
 * of the form {line: L, column: C}.  Lines and columns are both zero
 * based.
 *
 * It's best not to add new uses of this function.  In general it is
 * simpler and better to use the CSSToken offsets, rather than line
 * and column.  Also, this function lexes the entire input string at
 * once, rather than lazily yielding a token stream.  Use
 * |cssTokenizer| or |getCSSLexer| instead.
 *
 * @param{String} string The input string.
 * @return {Array} An array of tokens (@see CSSToken) that have
 *        line and column information.
 */
function cssTokenizerWithLineColumn(string) {
  const lexer = getCSSLexer(string);
  const result = [];
  let prevToken = undefined;
  while (true) {
    const token = lexer.nextToken();
    const lineNumber = lexer.lineNumber;
    const columnNumber = lexer.columnNumber;

    if (prevToken) {
      prevToken.loc.end = {
        line: lineNumber,
        column: columnNumber,
      };
    }

    if (!token) {
      break;
    }

    if (token.tokenType === "comment") {
      // We've already dealt with the previous token's location.
      prevToken = undefined;
    } else {
      const startLoc = {
        line: lineNumber,
        column: columnNumber,
      };
      token.loc = {start: startLoc};

      result.push(token);
      prevToken = token;
    }
  }

  return result;
}

/**
 * Escape a comment body.  Find the comment start and end strings in a
 * string and inserts backslashes so that the resulting text can
 * itself be put inside a comment.
 *
 * @param {String} inputString
 *                 input string
 * @return {String} the escaped result
 */
function escapeCSSComment(inputString) {
  const result = inputString.replace(/\/(\\*)\*/g, "/\\$1*");
  return result.replace(/\*(\\*)\//g, "*\\$1/");
}

/**
 * Un-escape a comment body.  This undoes any comment escaping that
 * was done by escapeCSSComment.  That is, given input like "/\*
 * comment *\/", it will strip the backslashes.
 *
 * @param {String} inputString
 *                 input string
 * @return {String} the un-escaped result
 */
function unescapeCSSComment(inputString) {
  const result = inputString.replace(/\/\\(\\*)\*/g, "/$1*");
  return result.replace(/\*\\(\\*)\//g, "*$1/");
}

/**
 * A helper function for @see parseDeclarations that handles parsing
 * of comment text.  This wraps a recursive call to parseDeclarations
 * with the processing needed to ensure that offsets in the result
 * refer back to the original, unescaped, input string.
 *
 * @param {Function} isCssPropertyKnown
 *        A function to check if the CSS property is known. This is either an
 *        internal server function or from the CssPropertiesFront.
 * @param {String} commentText The text of the comment, without the
 *                             delimiters.
 * @param {Number} startOffset The offset of the comment opener
 *                             in the original text.
 * @param {Number} endOffset The offset of the comment closer
 *                           in the original text.
 * @return {array} Array of declarations of the same form as returned
 *                 by parseDeclarations.
 */
function parseCommentDeclarations(isCssPropertyKnown, commentText, startOffset,
                                  endOffset) {
  let commentOverride = false;
  if (commentText === "") {
    return [];
  } else if (commentText[0] === COMMENT_PARSING_HEURISTIC_BYPASS_CHAR) {
    // This is the special sign that the comment was written by
    // rewriteDeclarations and so we should bypass the usual
    // heuristic.
    commentOverride = true;
    commentText = commentText.substring(1);
  }

  const rewrittenText = unescapeCSSComment(commentText);

  // We might have rewritten an embedded comment.  For example
  // /\* ... *\/ would turn into /* ... */.
  // This rewriting is necessary for proper lexing, but it means
  // that the offsets we get back can be off.  So now we compute
  // a map so that we can rewrite offsets later.  The map is the same
  // length as |rewrittenText| and tells us how to map an index
  // into |rewrittenText| to an index into |commentText|.
  //
  // First, we find the location of each comment starter or closer in
  // |rewrittenText|.  At these spots we put a 1 into |rewrites|.
  // Then we walk the array again, using the elements to compute a
  // delta, which we use to make the final mapping.
  //
  // Note we allocate one extra entry because we can see an ending
  // offset that is equal to the length.
  const rewrites = new Array(rewrittenText.length + 1).fill(0);

  const commentRe = /\/\\*\*|\*\\*\//g;
  while (true) {
    const matchData = commentRe.exec(rewrittenText);
    if (!matchData) {
      break;
    }
    rewrites[matchData.index] = 1;
  }

  let delta = 0;
  for (let i = 0; i <= rewrittenText.length; ++i) {
    delta += rewrites[i];
    // |startOffset| to add the offset from the comment starter, |+2|
    // for the length of the "/*", then |i| and |delta| as described
    // above.
    rewrites[i] = startOffset + 2 + i + delta;
    if (commentOverride) {
      ++rewrites[i];
    }
  }

  // Note that we pass "false" for parseComments here.  It doesn't
  // seem worthwhile to support declarations in comments-in-comments
  // here, as there's no way to generate those using the tools, and
  // users would be crazy to write such things.
  const newDecls = parseDeclarationsInternal(isCssPropertyKnown, rewrittenText,
                                           false, true, commentOverride);
  for (const decl of newDecls) {
    decl.offsets[0] = rewrites[decl.offsets[0]];
    decl.offsets[1] = rewrites[decl.offsets[1]];
    decl.colonOffsets[0] = rewrites[decl.colonOffsets[0]];
    decl.colonOffsets[1] = rewrites[decl.colonOffsets[1]];
    decl.commentOffsets = [startOffset, endOffset];
  }
  return newDecls;
}

/**
 * A helper function for parseDeclarationsInternal that creates a new
 * empty declaration.
 *
 * @return {object} an empty declaration of the form returned by
 *                  parseDeclarations
 */
function getEmptyDeclaration() {
  return {name: "", value: "", priority: "",
          terminator: "",
          offsets: [undefined, undefined],
          colonOffsets: false};
}

/**
 * Like trim, but only trims CSS-allowed whitespace.
 */
function cssTrim(str) {
  const match = /^[ \t\r\n\f]*(.*?)[ \t\r\n\f]*$/.exec(str);
  if (match) {
    return match[1];
  }
  return str;
}

/**
 * A helper function that does all the parsing work for
 * parseDeclarations.  This is separate because it has some arguments
 * that don't make sense in isolation.
 *
 * The return value and arguments are like parseDeclarations, with
 * these additional arguments.
 *
 * @param {Function} isCssPropertyKnown
 *        Function to check if the CSS property is known.
 * @param {Boolean} inComment
 *        If true, assume that this call is parsing some text
 *        which came from a comment in another declaration.
 *        In this case some heuristics are used to avoid parsing
 *        text which isn't obviously a series of declarations.
 * @param {Boolean} commentOverride
 *        This only makes sense when inComment=true.
 *        When true, assume that the comment was generated by
 *        rewriteDeclarations, and skip the usual name-checking
 *        heuristic.
 */
/* eslint-disable complexity */
function parseDeclarationsInternal(isCssPropertyKnown, inputString,
                                   parseComments, inComment, commentOverride) {
  if (inputString === null || inputString === undefined) {
    throw new Error("empty input string");
  }

  const lexer = getCSSLexer(inputString);

  let declarations = [getEmptyDeclaration()];
  let lastProp = declarations[0];

  // This tracks the various CSS blocks the current token is in currently.
  // This is a stack we push to when a block is opened, and we pop from when a block is
  // closed. Within a block, colons and semicolons don't advance the way they do outside
  // of blocks.
  let currentBlocks = [];

  // This tracks the "!important" parsing state.  The states are:
  // 0 - haven't seen anything
  // 1 - have seen "!", looking for "important" next (possibly after
  //     whitespace).
  // 2 - have seen "!important"
  let importantState = 0;
  // This is true if we saw whitespace or comments between the "!" and
  // the "important".
  let importantWS = false;
  let current = "";
  while (true) {
    const token = lexer.nextToken();
    if (!token) {
      break;
    }

    // Update the start and end offsets of the declaration, but only
    // when we see a significant token.
    if (token.tokenType !== "whitespace" && token.tokenType !== "comment") {
      if (lastProp.offsets[0] === undefined) {
        lastProp.offsets[0] = token.startOffset;
      }
      lastProp.offsets[1] = token.endOffset;
    } else if (lastProp.name && !current && !importantState &&
               !lastProp.priority && lastProp.colonOffsets[1]) {
      // Whitespace appearing after the ":" is attributed to it.
      lastProp.colonOffsets[1] = token.endOffset;
    } else if (importantState === 1) {
      importantWS = true;
    }

    if (token.tokenType === "symbol" &&
        currentBlocks[currentBlocks.length - 1] === token.text) {
      // Closing the last block that was opened.
      currentBlocks.pop();
      current += token.text;
    } else if (token.tokenType === "symbol" && CSS_BLOCKS[token.text]) {
      // Opening a new block.
      currentBlocks.push(CSS_BLOCKS[token.text]);
      current += token.text;
    } else if (token.tokenType === "function") {
      // Opening a function is like opening a new block, so push one to the stack.
      currentBlocks.push(CSS_BLOCKS["("]);
      current += token.text + "(";
    } else if (token.tokenType === "symbol" && token.text === ":") {
      // Either way, a "!important" we've seen is no longer valid now.
      importantState = 0;
      importantWS = false;
      if (!lastProp.name) {
        // Set the current declaration name if there's no name yet
        lastProp.name = cssTrim(current);
        lastProp.colonOffsets = [token.startOffset, token.endOffset];
        current = "";
        currentBlocks = [];

        // When parsing a comment body, if the left-hand-side is not a
        // valid property name, then drop it and stop parsing.
        if (inComment && !commentOverride &&
            !isCssPropertyKnown(lastProp.name)) {
          lastProp.name = null;
          break;
        }
      } else {
        // Otherwise, just append ':' to the current value (declaration value
        // with colons)
        current += ":";
      }
    } else if (token.tokenType === "symbol" && token.text === ";" &&
               !currentBlocks.length) {
      lastProp.terminator = "";
      // When parsing a comment, if the name hasn't been set, then we
      // have probably just seen an ordinary semicolon used in text,
      // so drop this and stop parsing.
      if (inComment && !lastProp.name) {
        current = "";
        currentBlocks = [];
        break;
      }
      if (importantState === 2) {
        lastProp.priority = "important";
      } else if (importantState === 1) {
        current += "!";
        if (importantWS) {
          current += " ";
        }
      }
      lastProp.value = cssTrim(current);
      current = "";
      currentBlocks = [];
      importantState = 0;
      importantWS = false;
      declarations.push(getEmptyDeclaration());
      lastProp = declarations[declarations.length - 1];
    } else if (token.tokenType === "ident") {
      if (token.text === "important" && importantState === 1) {
        importantState = 2;
      } else {
        if (importantState > 0) {
          current += "!";
          if (importantWS) {
            current += " ";
          }
          if (importantState === 2) {
            current += "important ";
          }
          importantState = 0;
          importantWS = false;
        }
        // Re-escape the token to avoid dequoting problems.
        // See bug 1287620.
        current += CSS.escape(token.text);
      }
    } else if (token.tokenType === "symbol" && token.text === "!") {
      importantState = 1;
    } else if (token.tokenType === "whitespace") {
      if (current !== "") {
        current = current.trimRight() + " ";
      }
    } else if (token.tokenType === "comment") {
      if (parseComments && !lastProp.name && !lastProp.value) {
        const commentText = inputString.substring(token.startOffset + 2,
                                                token.endOffset - 2);
        const newDecls = parseCommentDeclarations(isCssPropertyKnown, commentText,
                                                token.startOffset,
                                                token.endOffset);

        // Insert the new declarations just before the final element.
        const lastDecl = declarations.pop();
        declarations = [...declarations, ...newDecls, lastDecl];
      } else {
        current = current.trimRight() + " ";
      }
    } else {
      if (importantState > 0) {
        current += "!";
        if (importantWS) {
          current += " ";
        }
        if (importantState === 2) {
          current += "important ";
        }
        importantState = 0;
        importantWS = false;
      }
      current += inputString.substring(token.startOffset, token.endOffset);
    }
  }

  // Handle whatever trailing properties or values might still be there
  if (current) {
    if (!lastProp.name) {
      // Ignore this case in comments.
      if (!inComment) {
        // Trailing property found, e.g. p1:v1;p2:v2;p3
        lastProp.name = cssTrim(current);
      }
    } else {
      // Trailing value found, i.e. value without an ending ;
      if (importantState === 2) {
        lastProp.priority = "important";
      } else if (importantState === 1) {
        current += "!";
      }
      lastProp.value = cssTrim(current);
      const terminator = lexer.performEOFFixup("", true);
      lastProp.terminator = terminator + ";";
      // If the input was unterminated, attribute the remainder to
      // this property.  This avoids some bad behavior when rewriting
      // an unterminated comment.
      if (terminator) {
        lastProp.offsets[1] = inputString.length;
      }
    }
  }

  // Remove declarations that have neither a name nor a value
  declarations = declarations.filter(prop => prop.name || prop.value);

  return declarations;
}
/* eslint-enable complexity */

/**
 * Returns an array of CSS declarations given a string.
 * For example, parseDeclarations(isCssPropertyKnown, "width: 1px; height: 1px")
 * would return:
 * [{name:"width", value: "1px"}, {name: "height", "value": "1px"}]
 *
 * The input string is assumed to only contain declarations so { and }
 * characters will be treated as part of either the property or value,
 * depending where it's found.
 *
 * @param {Function} isCssPropertyKnown
 *        A function to check if the CSS property is known. This is either an
 *        internal server function or from the CssPropertiesFront.
 *        that are supported by the server.
 * @param {String} inputString
 *        An input string of CSS
 * @param {Boolean} parseComments
 *        If true, try to parse the contents of comments as well.
 *        A comment will only be parsed if it occurs outside of
 *        the body of some other declaration.
 * @return {Array} an array of objects with the following signature:
 *         [{"name": string, "value": string, "priority": string,
 *           "terminator": string,
 *           "offsets": [start, end], "colonOffsets": [start, end]},
 *          ...]
 *         Here, "offsets" holds the offsets of the start and end
 *         of the declaration text, in a form suitable for use with
 *         String.substring.
 *         "terminator" is a string to use to terminate the declaration,
 *         usually "" to mean no additional termination is needed.
 *         "colonOffsets" holds the start and end locations of the
 *         ":" that separates the property name from the value.
 *         If the declaration appears in a comment, then there will
 *         be an additional {"commentOffsets": [start, end] property
 *         on the object, which will hold the offsets of the start
 *         and end of the enclosing comment.
 */
function parseDeclarations(isCssPropertyKnown, inputString,
                           parseComments = false) {
  return parseDeclarationsInternal(isCssPropertyKnown, inputString,
                                   parseComments, false, false);
}

/**
 * Like @see parseDeclarations, but removes properties that do not
 * have a name.
 */
function parseNamedDeclarations(isCssPropertyKnown, inputString,
                                parseComments = false) {
  return parseDeclarations(isCssPropertyKnown, inputString, parseComments)
         .filter(item => !!item.name);
}

/**
 * Returns an array of the parsed CSS selector value and type given a string.
 *
 * The components making up the CSS selector can be extracted into 3 different
 * types: element, attribute and pseudoclass. The object that is appended to
 * the returned array contains the value related to one of the 3 types described
 * along with the actual type.
 *
 * The following are the 3 types that can be returned in the object signature:
 * (1) SELECTOR_ATTRIBUTE
 * (2) SELECTOR_ELEMENT
 * (3) SELECTOR_PSEUDO_CLASS
 *
 * @param {String} value
 *        The CSS selector text.
 * @return {Array} an array of objects with the following signature:
 *         [{ "value": string, "type": integer }, ...]
 */
/* eslint-disable complexity */
function parsePseudoClassesAndAttributes(value) {
  if (!value) {
    throw new Error("empty input string");
  }

  const tokens = cssTokenizer(value);
  const result = [];
  let current = "";
  let functionCount = 0;
  let hasAttribute = false;
  let hasColon = false;

  for (const token of tokens) {
    if (token.tokenType === "ident") {
      current += value.substring(token.startOffset, token.endOffset);

      if (hasColon && !functionCount) {
        if (current) {
          result.push({ value: current, type: SELECTOR_PSEUDO_CLASS });
        }

        current = "";
        hasColon = false;
      }
    } else if (token.tokenType === "symbol" && token.text === ":") {
      if (!hasColon) {
        if (current) {
          result.push({ value: current, type: SELECTOR_ELEMENT });
        }

        current = "";
        hasColon = true;
      }

      current += token.text;
    } else if (token.tokenType === "function") {
      current += value.substring(token.startOffset, token.endOffset);
      functionCount++;
    } else if (token.tokenType === "symbol" && token.text === ")") {
      current += token.text;

      if (hasColon && functionCount == 1) {
        if (current) {
          result.push({ value: current, type: SELECTOR_PSEUDO_CLASS });
        }

        current = "";
        functionCount--;
        hasColon = false;
      } else {
        functionCount--;
      }
    } else if (token.tokenType === "symbol" && token.text === "[") {
      if (!hasAttribute && !functionCount) {
        if (current) {
          result.push({ value: current, type: SELECTOR_ELEMENT });
        }

        current = "";
        hasAttribute = true;
      }

      current += token.text;
    } else if (token.tokenType === "symbol" && token.text === "]") {
      current += token.text;

      if (hasAttribute && !functionCount) {
        if (current) {
          result.push({ value: current, type: SELECTOR_ATTRIBUTE });
        }

        current = "";
        hasAttribute = false;
      }
    } else {
      current += value.substring(token.startOffset, token.endOffset);
    }
  }

  if (current) {
    result.push({ value: current, type: SELECTOR_ELEMENT });
  }

  return result;
}
/* eslint-enable complexity */

/**
 * Expects a single CSS value to be passed as the input and parses the value
 * and priority.
 *
 * @param {Function} isCssPropertyKnown
 *        A function to check if the CSS property is known. This is either an
 *        internal server function or from the CssPropertiesFront.
 *        that are supported by the server.
 * @param {String} value
 *        The value from the text editor.
 * @return {Object} an object with 'value' and 'priority' properties.
 */
function parseSingleValue(isCssPropertyKnown, value) {
  const declaration = parseDeclarations(isCssPropertyKnown,
                                      "a: " + value + ";")[0];
  return {
    value: declaration ? declaration.value : "",
    priority: declaration ? declaration.priority : "",
  };
}

/**
 * Convert an angle value to degree.
 *
 * @param {Number} angleValue The angle value.
 * @param {CSS_ANGLEUNIT} angleUnit The angleValue's angle unit.
 * @return {Number} An angle value in degree.
 */
function getAngleValueInDegrees(angleValue, angleUnit) {
  switch (angleUnit) {
    case CSS_ANGLEUNIT.deg:
      return angleValue;
    case CSS_ANGLEUNIT.grad:
      return angleValue * 0.9;
    case CSS_ANGLEUNIT.rad:
      return angleValue * 180 / Math.PI;
    case CSS_ANGLEUNIT.turn:
      return angleValue * 360;
    default:
      throw new Error("No matched angle unit.");
  }
}

exports.cssTokenizer = cssTokenizer;
exports.cssTokenizerWithLineColumn = cssTokenizerWithLineColumn;
exports.escapeCSSComment = escapeCSSComment;
exports.unescapeCSSComment = unescapeCSSComment;
exports.parseDeclarations = parseDeclarations;
exports.parseNamedDeclarations = parseNamedDeclarations;
// parseCommentDeclarations is exported for testing.
exports._parseCommentDeclarations = parseCommentDeclarations;
exports.parsePseudoClassesAndAttributes = parsePseudoClassesAndAttributes;
exports.parseSingleValue = parseSingleValue;
exports.getAngleValueInDegrees = getAngleValueInDegrees;