UNPKG

@acemir/cssom

Version:

CSS Object Model implementation and CSS parser

1,531 lines (1,371 loc) 112 kB
//.CommonJS var CSSOM = {}; var regexPatterns = require("./regexPatterns").regexPatterns; ///CommonJS /** * Parses a CSS string and returns a `CSSStyleSheet` object representing the parsed stylesheet. * * @param {string} token - The CSS string to parse. * @param {object} [opts] - Optional parsing options. * @param {object} [opts.globalObject] - An optional global object to prioritize over the window object. Useful on jsdom webplatform tests. * @param {Element | ProcessingInstruction} [opts.ownerNode] - The owner node of the stylesheet. * @param {CSSRule} [opts.ownerRule] - The owner rule of the stylesheet. * @param {CSSOM.CSSStyleSheet} [opts.styleSheet] - Reuse a style sheet instead of creating a new one (e.g. as `parentStyleSheet`) * @param {CSSOM.CSSRuleList} [opts.cssRules] - Prepare all rules in this list instead of mutating the style sheet continually * @param {function|boolean} [errorHandler] - Optional error handler function or `true` to use `console.error`. * @returns {CSSOM.CSSStyleSheet} The parsed `CSSStyleSheet` object. */ CSSOM.parse = function parse(token, opts, errorHandler) { errorHandler = errorHandler === true ? (console && console.error) : errorHandler; var i = 0; /** "before-selector" or "selector" or "atRule" or "atBlock" or "conditionBlock" or "before-name" or "name" or "before-value" or "value" */ var state = "before-selector"; var index; var buffer = ""; var valueParenthesisDepth = 0; var hasUnmatchedQuoteInSelector = false; // Track if current selector has unmatched quote var SIGNIFICANT_WHITESPACE = { "name": true, "before-name": true, "selector": true, "value": true, "value-parenthesis": true, "atRule": true, "importRule-begin": true, "importRule": true, "namespaceRule-begin": true, "namespaceRule": true, "atBlock": true, "containerBlock": true, "conditionBlock": true, "counterStyleBlock": true, "propertyBlock": true, 'documentRule-begin': true, "scopeBlock": true, "layerBlock": true, "pageBlock": true }; var styleSheet; if (opts && opts.styleSheet) { styleSheet = opts.styleSheet; } else { if (opts && opts.globalObject && opts.globalObject.CSSStyleSheet) { styleSheet = new opts.globalObject.CSSStyleSheet(); } else { styleSheet = new CSSOM.CSSStyleSheet(); } styleSheet.__constructed = false; } var topScope; if (opts && opts.cssRules) { topScope = { cssRules: opts.cssRules }; } else { topScope = styleSheet; } if (opts && opts.ownerNode) { styleSheet.__ownerNode = opts.ownerNode; var ownerNodeMedia = opts.ownerNode.media || (opts.ownerNode.getAttribute && opts.ownerNode.getAttribute("media")); if (ownerNodeMedia) { styleSheet.media.mediaText = ownerNodeMedia; } var ownerNodeTitle = opts.ownerNode.title || (opts.ownerNode.getAttribute && opts.ownerNode.getAttribute("title")); if (ownerNodeTitle) { styleSheet.__title = ownerNodeTitle; } } if (opts && opts.ownerRule) { styleSheet.__ownerRule = opts.ownerRule; } // @type CSSStyleSheet|CSSMediaRule|CSSContainerRule|CSSSupportsRule|CSSFontFaceRule|CSSKeyframesRule|CSSDocumentRule var currentScope = topScope; // @type CSSMediaRule|CSSContainerRule|CSSSupportsRule|CSSKeyframesRule|CSSDocumentRule var parentRule; var ancestorRules = []; var prevScope; var name, priority = "", styleRule, mediaRule, containerRule, counterStyleRule, propertyRule, supportsRule, importRule, fontFaceRule, keyframesRule, documentRule, hostRule, startingStyleRule, scopeRule, pageRule, layerBlockRule, layerStatementRule, nestedSelectorRule, namespaceRule; // Track defined namespace prefixes for validation var definedNamespacePrefixes = {}; // Track which rules have been added var ruleIdCounter = 0; var addedToParent = {}; var addedToTopScope = {}; var addedToCurrentScope = {}; // Helper to get unique ID for tracking rules function getRuleId(rule) { if (!rule.__parseId) { rule.__parseId = ++ruleIdCounter; } return rule.__parseId; } // Cache last validation boundary position // to avoid rescanning the entire token string for each at-rule var lastValidationBoundary = 0; // Pre-compile validation regexes for common at-rules var validationRegexCache = {}; function getValidationRegex(atRuleKey) { if (!validationRegexCache[atRuleKey]) { var sourceRuleRegExp = atRuleKey === "@import" ? forwardImportRuleValidationRegExp : forwardRuleValidationRegExp; validationRegexCache[atRuleKey] = new RegExp(atRuleKey + sourceRuleRegExp.source, sourceRuleRegExp.flags); } return validationRegexCache[atRuleKey]; } // Import regex patterns from shared module var atKeyframesRegExp = regexPatterns.atKeyframesRegExp; var beforeRulePortionRegExp = regexPatterns.beforeRulePortionRegExp; var beforeRuleValidationRegExp = regexPatterns.beforeRuleValidationRegExp; var forwardRuleValidationRegExp = regexPatterns.forwardRuleValidationRegExp; var forwardImportRuleValidationRegExp = regexPatterns.forwardImportRuleValidationRegExp; // Pre-compile regexBefore to avoid creating it on every validateAtRule call var regexBefore = new RegExp(beforeRulePortionRegExp.source, beforeRulePortionRegExp.flags); var forwardRuleClosingBraceRegExp = regexPatterns.forwardRuleClosingBraceRegExp; var forwardRuleSemicolonAndOpeningBraceRegExp = regexPatterns.forwardRuleSemicolonAndOpeningBraceRegExp; var cssCustomIdentifierRegExp = regexPatterns.cssCustomIdentifierRegExp; var startsWithCombinatorRegExp = regexPatterns.startsWithCombinatorRegExp; var atPageRuleSelectorRegExp = regexPatterns.atPageRuleSelectorRegExp; var startsWithHexEscapeRegExp = regexPatterns.startsWithHexEscapeRegExp; var identStartCharRegExp = regexPatterns.identStartCharRegExp; var identCharRegExp = regexPatterns.identCharRegExp; var specialCharsNeedEscapeRegExp = regexPatterns.specialCharsNeedEscapeRegExp; var combinatorOrSeparatorRegExp = regexPatterns.combinatorOrSeparatorRegExp; var afterHexEscapeSeparatorRegExp = regexPatterns.afterHexEscapeSeparatorRegExp; var trailingSpaceSeparatorRegExp = regexPatterns.trailingSpaceSeparatorRegExp; var endsWithHexEscapeRegExp = regexPatterns.endsWithHexEscapeRegExp; var attributeSelectorContentRegExp = regexPatterns.attributeSelectorContentRegExp; var pseudoElementRegExp = regexPatterns.pseudoElementRegExp; var invalidCombinatorLtGtRegExp = regexPatterns.invalidCombinatorLtGtRegExp; var invalidCombinatorDoubleGtRegExp = regexPatterns.invalidCombinatorDoubleGtRegExp; var consecutiveCombinatorsRegExp = regexPatterns.consecutiveCombinatorsRegExp; var invalidSlottedRegExp = regexPatterns.invalidSlottedRegExp; var invalidPartRegExp = regexPatterns.invalidPartRegExp; var invalidCueRegExp = regexPatterns.invalidCueRegExp; var invalidCueRegionRegExp = regexPatterns.invalidCueRegionRegExp; var invalidNestingPattern = regexPatterns.invalidNestingPattern; var emptyPseudoClassRegExp = regexPatterns.emptyPseudoClassRegExp; var whitespaceNormalizationRegExp = regexPatterns.whitespaceNormalizationRegExp; var newlineRemovalRegExp = regexPatterns.newlineRemovalRegExp; var whitespaceAndDotRegExp = regexPatterns.whitespaceAndDotRegExp; var declarationOrOpenBraceRegExp = regexPatterns.declarationOrOpenBraceRegExp; var ampersandRegExp = regexPatterns.ampersandRegExp; var hexEscapeSequenceRegExp = regexPatterns.hexEscapeSequenceRegExp; var attributeCaseFlagRegExp = regexPatterns.attributeCaseFlagRegExp; var prependedAmpersandRegExp = regexPatterns.prependedAmpersandRegExp; var openBraceGlobalRegExp = regexPatterns.openBraceGlobalRegExp; var closeBraceGlobalRegExp = regexPatterns.closeBraceGlobalRegExp; var scopePreludeSplitRegExp = regexPatterns.scopePreludeSplitRegExp; var leadingWhitespaceRegExp = regexPatterns.leadingWhitespaceRegExp; var doubleQuoteRegExp = regexPatterns.doubleQuoteRegExp; var backslashRegExp = regexPatterns.backslashRegExp; /** * Searches for the first occurrence of a CSS at-rule statement terminator (`;` or `}`) * that is not inside a brace block within the given string. Mimics the behavior of a * regular expression match for such terminators, including any trailing whitespace. * @param {string} str - The string to search for at-rule statement terminators. * @returns {object | null} {0: string, index: number} or null if no match is found. */ function atRulesStatemenRegExpES5Alternative(ruleSlice) { for (var i = 0; i < ruleSlice.length; i++) { var char = ruleSlice[i]; if (char === ';' || char === '}') { // Simulate negative lookbehind: check if there is a { before this position var sliceBefore = ruleSlice.substring(0, i); var openBraceIndex = sliceBefore.indexOf('{'); if (openBraceIndex === -1) { // No { found before, so we treat it as a valid match var match = char; var j = i + 1; while (j < ruleSlice.length && /\s/.test(ruleSlice[j])) { match += ruleSlice[j]; j++; } var matchObj = [match]; matchObj.index = i; matchObj.input = ruleSlice; return matchObj; } } } return null; } /** * Finds the first balanced block (including nested braces) in the string, starting from fromIndex. * Returns an object similar to RegExp.prototype.match output. * @param {string} str - The string to search. * @param {number} [fromIndex=0] - The index to start searching from. * @returns {object|null} - { 0: matchedString, index: startIndex, input: str } or null if not found. */ function matchBalancedBlock(str, fromIndex) { fromIndex = fromIndex || 0; var openIndex = str.indexOf('{', fromIndex); if (openIndex === -1) return null; var depth = 0; for (var i = openIndex; i < str.length; i++) { if (str[i] === '{') { depth++; } else if (str[i] === '}') { depth--; if (depth === 0) { var matchedString = str.slice(openIndex, i + 1); return { 0: matchedString, index: openIndex, input: str }; } } } return null; } /** * Advances the index `i` to skip over a balanced block of curly braces in the given string. * This is typically used to ignore the contents of a CSS rule block. * * @param {number} i - The current index in the string to start searching from. * @param {string} str - The string containing the CSS code. * @param {number} fromIndex - The index in the string where the balanced block search should begin. * @returns {number} The updated index after skipping the balanced block. */ function ignoreBalancedBlock(i, str, fromIndex) { var ruleClosingMatch = matchBalancedBlock(str, fromIndex); if (ruleClosingMatch) { var ignoreRange = ruleClosingMatch.index + ruleClosingMatch[0].length; i += ignoreRange; if (token.charAt(i) === '}') { i -= 1; } } else { i += str.length; } return i; } /** * Parses the scope prelude and extracts start and end selectors. * @param {string} preludeContent - The scope prelude content (without @scope keyword) * @returns {object} Object with startSelector and endSelector properties */ function parseScopePrelude(preludeContent) { var parts = preludeContent.split(scopePreludeSplitRegExp); // Restore the parentheses that were consumed by the split if (parts.length === 2) { parts[0] = parts[0] + ')'; parts[1] = '(' + parts[1]; } var hasStart = parts[0] && parts[0].charAt(0) === '(' && parts[0].charAt(parts[0].length - 1) === ')'; var hasEnd = parts[1] && parts[1].charAt(0) === '(' && parts[1].charAt(parts[1].length - 1) === ')'; // Handle case: @scope to (<end>) var hasOnlyEnd = !hasStart && !hasEnd && parts[0].indexOf('to (') === 0 && parts[0].charAt(parts[0].length - 1) === ')'; var startSelector = ''; var endSelector = ''; if (hasStart) { startSelector = parts[0].slice(1, -1).trim(); } if (hasEnd) { endSelector = parts[1].slice(1, -1).trim(); } if (hasOnlyEnd) { endSelector = parts[0].slice(4, -1).trim(); } return { startSelector: startSelector, endSelector: endSelector, hasStart: hasStart, hasEnd: hasEnd, hasOnlyEnd: hasOnlyEnd }; }; /** * Checks if a selector contains pseudo-elements. * @param {string} selector - The CSS selector to check * @returns {boolean} True if the selector contains pseudo-elements */ function hasPseudoElement(selector) { // Match only double-colon (::) pseudo-elements // Also match legacy single-colon pseudo-elements: :before, :after, :first-line, :first-letter // These must NOT be followed by alphanumeric characters (to avoid matching :before-x or similar) return pseudoElementRegExp.test(selector); }; /** * Validates balanced parentheses, brackets, and quotes in a selector. * * @param {string} selector - The CSS selector to validate * @param {boolean} trackAttributes - Whether to track attribute selector context * @param {boolean} useStack - Whether to use a stack for parentheses (needed for nested validation) * @returns {boolean} True if the syntax is valid (all brackets, parentheses, and quotes are balanced) */ function validateBalancedSyntax(selector, trackAttributes, useStack) { var parenDepth = 0; var bracketDepth = 0; var inSingleQuote = false; var inDoubleQuote = false; var inAttr = false; var stack = useStack ? [] : null; for (var i = 0; i < selector.length; i++) { var char = selector[i]; // Handle escape sequences - skip hex escapes or simple escapes if (char === '\\') { var escapeLen = getEscapeSequenceLength(selector, i); if (escapeLen > 0) { i += escapeLen - 1; // -1 because loop will increment continue; } } if (inSingleQuote) { if (char === "'") { inSingleQuote = false; } } else if (inDoubleQuote) { if (char === '"') { inDoubleQuote = false; } } else if (trackAttributes && inAttr) { if (char === "]") { inAttr = false; } else if (char === "'") { inSingleQuote = true; } else if (char === '"') { inDoubleQuote = true; } } else { if (trackAttributes && char === "[") { inAttr = true; } else if (char === "'") { inSingleQuote = true; } else if (char === '"') { inDoubleQuote = true; } else if (char === '(') { if (useStack) { stack.push("("); } else { parenDepth++; } } else if (char === ')') { if (useStack) { if (!stack.length || stack.pop() !== "(") { return false; } } else { parenDepth--; if (parenDepth < 0) { return false; } } } else if (char === '[') { bracketDepth++; } else if (char === ']') { bracketDepth--; if (bracketDepth < 0) { return false; } } } } // Check if everything is balanced if (useStack) { return stack.length === 0 && bracketDepth === 0 && !inSingleQuote && !inDoubleQuote && !inAttr; } else { return parenDepth === 0 && bracketDepth === 0 && !inSingleQuote && !inDoubleQuote; } }; /** * Checks for basic syntax errors in selectors (mismatched parentheses, brackets, quotes). * @param {string} selector - The CSS selector to check * @returns {boolean} True if there are syntax errors */ function hasBasicSyntaxError(selector) { return !validateBalancedSyntax(selector, false, false); }; /** * Checks for invalid combinator patterns in selectors. * @param {string} selector - The CSS selector to check * @returns {boolean} True if the selector contains invalid combinators */ function hasInvalidCombinators(selector) { // Check for invalid combinator patterns: // - <> (not a valid combinator) // - >> (deep descendant combinator, deprecated and invalid) // - Multiple consecutive combinators like >>, >~, etc. if (invalidCombinatorLtGtRegExp.test(selector)) return true; if (invalidCombinatorDoubleGtRegExp.test(selector)) return true; // Check for other invalid consecutive combinator patterns if (consecutiveCombinatorsRegExp.test(selector)) return true; return false; }; /** * Checks for invalid pseudo-like syntax (function calls without proper pseudo prefix). * @param {string} selector - The CSS selector to check * @returns {boolean} True if the selector contains invalid pseudo-like syntax */ function hasInvalidPseudoSyntax(selector) { // Check for specific known pseudo-elements used without : or :: prefix // Examples: slotted(div), part(name), cue(selector) // These are ONLY valid as ::slotted(), ::part(), ::cue() var invalidPatterns = [ invalidSlottedRegExp, invalidPartRegExp, invalidCueRegExp, invalidCueRegionRegExp ]; for (var i = 0; i < invalidPatterns.length; i++) { if (invalidPatterns[i].test(selector)) { return true; } } return false; }; /** * Checks for invalid nesting selector (&) usage. * The & selector cannot be directly followed by a type selector without a delimiter. * Valid: &.class, &#id, &[attr], &:hover, &::before, & div, &>div * Invalid: &div, &span * @param {string} selector - The CSS selector to check * @returns {boolean} True if the selector contains invalid & usage */ function hasInvalidNestingSelector(selector) { // Check for & followed directly by a letter (type selector) without any delimiter // This regex matches & followed by a letter (start of type selector) that's not preceded by an escape // We need to exclude valid cases like &.class, &#id, &[attr], &:pseudo, &::pseudo, & (with space), &> return invalidNestingPattern.test(selector); }; /** * Checks if an at-rule can be nested based on parent chain validation. * Used for at-rules like `@counter-style`, `@property` and `@font-face` rules that can only be nested inside * `CSSScopeRule` or `CSSConditionRule` without `CSSStyleRule` in parent chain. * @returns {boolean} `true` if nesting is allowed, `false` otherwise */ function canAtRuleBeNested() { if (currentScope === topScope) { return true; // Top-level is always allowed } var hasStyleRuleInChain = false; var hasValidParent = false; // Check currentScope if (currentScope.constructor.name === 'CSSStyleRule') { hasStyleRuleInChain = true; } else if (currentScope instanceof CSSOM.CSSScopeRule || currentScope instanceof CSSOM.CSSConditionRule) { hasValidParent = true; } // Check ancestorRules for CSSStyleRule if (!hasStyleRuleInChain) { for (var j = 0; j < ancestorRules.length; j++) { if (ancestorRules[j].constructor.name === 'CSSStyleRule') { hasStyleRuleInChain = true; break; } if (ancestorRules[j] instanceof CSSOM.CSSScopeRule || ancestorRules[j] instanceof CSSOM.CSSConditionRule) { hasValidParent = true; } } } // Allow nesting if we have a valid parent and no style rule in the chain return hasValidParent && !hasStyleRuleInChain; } function validateAtRule(atRuleKey, validCallback, cannotBeNested) { var isValid = false; // Use cached regex instead of creating new one each time var ruleRegExp = getValidationRegex(atRuleKey); // Only slice what we need for validation (max 100 chars) // since we only check match at position 0 var lookAheadLength = Math.min(100, token.length - i); var ruleSlice = token.slice(i, i + lookAheadLength); // Not all rules can be nested, if the rule cannot be nested and is in the root scope, do not perform the check var shouldPerformCheck = cannotBeNested && currentScope !== topScope ? false : true; // First, check if there is no invalid characters just after the at-rule if (shouldPerformCheck && ruleSlice.search(ruleRegExp) === 0) { // Only scan from the last known validation boundary var searchStart = Math.max(0, lastValidationBoundary); var beforeSlice = token.slice(searchStart, i); // Use pre-compiled regex instead of creating new one each time var matches = beforeSlice.match(regexBefore); var lastI = matches ? searchStart + beforeSlice.lastIndexOf(matches[matches.length - 1]) : searchStart; var toCheckSlice = token.slice(lastI, i); // Check if we don't have any invalid in the portion before the `at-rule` and the closest allowed character var checkedSlice = toCheckSlice.search(beforeRuleValidationRegExp); if (checkedSlice === 0) { isValid = true; // Update the validation boundary cache to this position lastValidationBoundary = lastI; } } // Additional validation for @scope rule if (isValid && atRuleKey === "@scope") { var openBraceIndex = ruleSlice.indexOf('{'); if (openBraceIndex !== -1) { // Extract the rule prelude (everything between the at-rule and {) var rulePrelude = ruleSlice.slice(0, openBraceIndex).trim(); // Skip past at-rule keyword and whitespace var preludeContent = rulePrelude.slice("@scope".length).trim(); if (preludeContent.length > 0) { // Parse the scope prelude var parsedScopePrelude = parseScopePrelude(preludeContent); var startSelector = parsedScopePrelude.startSelector; var endSelector = parsedScopePrelude.endSelector; var hasStart = parsedScopePrelude.hasStart; var hasEnd = parsedScopePrelude.hasEnd; var hasOnlyEnd = parsedScopePrelude.hasOnlyEnd; // Validation rules for @scope: // 1. Empty selectors in parentheses are invalid: @scope () {} or @scope (.a) to () {} if ((hasStart && startSelector === '') || (hasEnd && endSelector === '') || (hasOnlyEnd && endSelector === '')) { isValid = false; } // 2. Pseudo-elements are invalid in scope selectors else if ((startSelector && hasPseudoElement(startSelector)) || (endSelector && hasPseudoElement(endSelector))) { isValid = false; } // 3. Basic syntax errors (mismatched parens, brackets, quotes) else if ((startSelector && hasBasicSyntaxError(startSelector)) || (endSelector && hasBasicSyntaxError(endSelector))) { isValid = false; } // 4. Invalid combinator patterns else if ((startSelector && hasInvalidCombinators(startSelector)) || (endSelector && hasInvalidCombinators(endSelector))) { isValid = false; } // 5. Invalid pseudo-like syntax (function without : or :: prefix) else if ((startSelector && hasInvalidPseudoSyntax(startSelector)) || (endSelector && hasInvalidPseudoSyntax(endSelector))) { isValid = false; } // 6. Invalid structure (no proper parentheses found when prelude is not empty) else if (!hasStart && !hasOnlyEnd) { isValid = false; } } // Empty prelude (@scope {}) is valid } } if (isValid && atRuleKey === "@page") { var openBraceIndex = ruleSlice.indexOf('{'); if (openBraceIndex !== -1) { // Extract the rule prelude (everything between the at-rule and {) var rulePrelude = ruleSlice.slice(0, openBraceIndex).trim(); // Skip past at-rule keyword and whitespace var preludeContent = rulePrelude.slice("@page".length).trim(); if (preludeContent.length > 0) { var trimmedValue = preludeContent.trim(); // Empty selector is valid for @page if (trimmedValue !== '') { // Parse @page selectorText for page name and pseudo-pages // Valid formats: // - (empty - no name, no pseudo-page) // - :left, :right, :first, :blank (pseudo-page only) // - named (named page only) // - named:first (named page with single pseudo-page) // - named:first:left (named page with multiple pseudo-pages) var match = trimmedValue.match(atPageRuleSelectorRegExp); if (match) { var pageName = match[1] || ''; var pseudoPages = match[2] || ''; // Validate page name if present if (pageName) { if (!cssCustomIdentifierRegExp.test(pageName)) { isValid = false; } } // Validate pseudo-pages if present if (pseudoPages) { var pseudos = pseudoPages.split(':').filter(function (p) { return p; }); var validPseudos = ['left', 'right', 'first', 'blank']; var allValid = true; for (var j = 0; j < pseudos.length; j++) { if (validPseudos.indexOf(pseudos[j].toLowerCase()) === -1) { allValid = false; break; } } if (!allValid) { isValid = false; } } } else { isValid = false; } } } } } if (!isValid) { // If it's invalid the browser will simply ignore the entire invalid block // Use regex to find the closing brace of the invalid rule // Regex used above is not ES5 compliant. Using alternative. // var ruleStatementMatch = ruleSlice.match(atRulesStatemenRegExp); // var ruleStatementMatch = atRulesStatemenRegExpES5Alternative(ruleSlice); // If it's a statement inside a nested rule, ignore only the statement if (ruleStatementMatch && currentScope !== topScope) { var ignoreEnd = ruleStatementMatch[0].indexOf(";"); i += ruleStatementMatch.index + ignoreEnd; return; } // Check if there's a semicolon before the invalid at-rule and the first opening brace if (atRuleKey === "@layer") { var ruleSemicolonAndOpeningBraceMatch = ruleSlice.match(forwardRuleSemicolonAndOpeningBraceRegExp); if (ruleSemicolonAndOpeningBraceMatch && ruleSemicolonAndOpeningBraceMatch[1] === ";") { // Ignore the rule block until the semicolon i += ruleSemicolonAndOpeningBraceMatch.index + ruleSemicolonAndOpeningBraceMatch[0].length; state = "before-selector"; return; } } // Ignore the entire rule block (if it's a statement it should ignore the statement plus the next block) i = ignoreBalancedBlock(i, ruleSlice); state = "before-selector"; } else { validCallback.call(this); } } // Helper functions for looseSelectorValidator // Defined outside to avoid recreation on every validation call /** * Check if character is a valid identifier start * @param {string} c - Character to check * @returns {boolean} */ function isIdentStart(c) { return /[a-zA-Z_\u00A0-\uFFFF]/.test(c); } /** * Check if character is a valid identifier character * @param {string} c - Character to check * @returns {boolean} */ function isIdentChar(c) { return /[a-zA-Z0-9_\u00A0-\uFFFF\-]/.test(c); } /** * Helper function to validate CSS selector syntax without regex backtracking. * Iteratively parses the selector string to identify valid components. * * Supports: * - Escaped characters (e.g., .class\!, #id\@name) * - Namespace selectors (ns|element, *|element, |element) * - All standard CSS selectors (class, ID, type, attribute, pseudo, etc.) * - Combinators (>, +, ~, whitespace) * - Nesting selector (&) * * This approach eliminates exponential backtracking by using explicit character-by-character * parsing instead of nested quantifiers in regex. * * @param {string} selector - The selector to validate * @returns {boolean} - True if valid selector syntax */ function looseSelectorValidator(selector) { if (!selector || selector.length === 0) { return false; } var i = 0; var len = selector.length; var hasMatchedComponent = false; // Helper: Skip escaped character (backslash + hex escape or any char) function skipEscape() { if (i < len && selector[i] === '\\') { var escapeLen = getEscapeSequenceLength(selector, i); if (escapeLen > 0) { i += escapeLen; // Skip entire escape sequence return true; } } return false; } // Helper: Parse identifier (with possible escapes) function parseIdentifier() { var start = i; while (i < len) { if (skipEscape()) { continue; } else if (isIdentChar(selector[i])) { i++; } else { break; } } return i > start; } // Helper: Parse namespace prefix (optional) function parseNamespace() { var start = i; // Match: *| or identifier| or | if (i < len && selector[i] === '*') { i++; } else if (i < len && (isIdentStart(selector[i]) || selector[i] === '\\')) { parseIdentifier(); } if (i < len && selector[i] === '|') { i++; return true; } // Rollback if no pipe found i = start; return false; } // Helper: Parse pseudo-class/element arguments (with balanced parens) function parsePseudoArgs() { if (i >= len || selector[i] !== '(') { return false; } i++; // Skip opening paren var depth = 1; var inString = false; var stringChar = ''; while (i < len && depth > 0) { var c = selector[i]; if (c === '\\' && i + 1 < len) { i += 2; // Skip escaped character } else if (!inString && (c === '"' || c === '\'')) { inString = true; stringChar = c; i++; } else if (inString && c === stringChar) { inString = false; i++; } else if (!inString && c === '(') { depth++; i++; } else if (!inString && c === ')') { depth--; i++; } else { i++; } } return depth === 0; } // Main parsing loop while (i < len) { var matched = false; var start = i; // Skip whitespace while (i < len && /\s/.test(selector[i])) { i++; } if (i > start) { hasMatchedComponent = true; continue; } // Match combinators: >, +, ~ if (i < len && /[>+~]/.test(selector[i])) { i++; hasMatchedComponent = true; // Skip trailing whitespace while (i < len && /\s/.test(selector[i])) { i++; } continue; } // Match nesting selector: & if (i < len && selector[i] === '&') { i++; hasMatchedComponent = true; matched = true; } // Match class selector: .identifier else if (i < len && selector[i] === '.') { i++; if (parseIdentifier()) { hasMatchedComponent = true; matched = true; } } // Match ID selector: #identifier else if (i < len && selector[i] === '#') { i++; if (parseIdentifier()) { hasMatchedComponent = true; matched = true; } } // Match pseudo-class/element: :identifier or ::identifier else if (i < len && selector[i] === ':') { i++; if (i < len && selector[i] === ':') { i++; // Pseudo-element } if (parseIdentifier()) { parsePseudoArgs(); // Optional arguments hasMatchedComponent = true; matched = true; } } // Match attribute selector: [...] else if (i < len && selector[i] === '[') { i++; var depth = 1; while (i < len && depth > 0) { if (selector[i] === '\\') { i += 2; } else if (selector[i] === '\'') { i++; while (i < len && selector[i] !== '\'') { if (selector[i] === '\\') i += 2; else i++; } if (i < len) i++; // Skip closing quote } else if (selector[i] === '"') { i++; while (i < len && selector[i] !== '"') { if (selector[i] === '\\') i += 2; else i++; } if (i < len) i++; // Skip closing quote } else if (selector[i] === '[') { depth++; i++; } else if (selector[i] === ']') { depth--; i++; } else { i++; } } if (depth === 0) { hasMatchedComponent = true; matched = true; } } // Match type selector with optional namespace: [namespace|]identifier else if (i < len && (isIdentStart(selector[i]) || selector[i] === '\\' || selector[i] === '*' || selector[i] === '|')) { parseNamespace(); // Optional namespace prefix if (i < len && selector[i] === '*') { i++; // Universal selector hasMatchedComponent = true; matched = true; } else if (i < len && (isIdentStart(selector[i]) || selector[i] === '\\')) { if (parseIdentifier()) { hasMatchedComponent = true; matched = true; } } } // If no match found, invalid selector if (!matched && i === start) { return false; } } return hasMatchedComponent; } /** * Validates a basic CSS selector, allowing for deeply nested balanced parentheses in pseudo-classes. * This function replaces the previous basicSelectorRegExp. * * This function matches: * - Type selectors (e.g., `div`, `span`) * - Universal selector (`*`) * - Namespace selectors (e.g., `*|div`, `custom|div`, `|div`) * - ID selectors (e.g., `#header`, `#a\ b`, `#åèiöú`) * - Class selectors (e.g., `.container`, `.a\ b`, `.åèiöú`) * - Attribute selectors (e.g., `[type="text"]`) * - Pseudo-classes and pseudo-elements (e.g., `:hover`, `::before`, `:nth-child(2)`) * - Pseudo-classes with nested parentheses, including cases where parentheses are nested inside arguments, * such as `:has(.sel:nth-child(3n))` * - The parent selector (`&`) * - Combinators (`>`, `+`, `~`) with optional whitespace * - Whitespace (descendant combinator) * * Unicode and escape sequences are allowed in identifiers. * * @param {string} selector * @returns {boolean} */ function basicSelectorValidator(selector) { // Guard against extremely long selectors to prevent potential regex performance issues // Reasonable selectors are typically under 1000 characters if (selector.length > 10000) { return false; } // Validate balanced syntax with attribute tracking and stack-based parentheses matching if (!validateBalancedSyntax(selector, true, true)) { return false; } // Check for invalid combinator patterns if (hasInvalidCombinators(selector)) { return false; } // Check for invalid pseudo-like syntax if (hasInvalidPseudoSyntax(selector)) { return false; } // Check for invalid nesting selector (&) usage if (hasInvalidNestingSelector(selector)) { return false; } // Check for invalid pseudo-class usage with quoted strings // Pseudo-classes like :lang(), :dir(), :nth-*() should not accept quoted strings // Using iterative parsing instead of regex to avoid exponential backtracking var noQuotesPseudos = ['lang', 'dir', 'nth-child', 'nth-last-child', 'nth-of-type', 'nth-last-of-type']; for (var idx = 0; idx < selector.length; idx++) { // Look for pseudo-class/element start if (selector[idx] === ':') { var pseudoStart = idx; idx++; // Skip second colon for pseudo-elements if (idx < selector.length && selector[idx] === ':') { idx++; } // Extract pseudo name var nameStart = idx; while (idx < selector.length && /[a-zA-Z0-9\-]/.test(selector[idx])) { idx++; } if (idx === nameStart) { continue; // No name found } var pseudoName = selector.substring(nameStart, idx).toLowerCase(); // Check if this pseudo has arguments if (idx < selector.length && selector[idx] === '(') { idx++; var contentStart = idx; var depth = 1; // Find matching closing paren (handle nesting) while (idx < selector.length && depth > 0) { if (selector[idx] === '\\') { idx += 2; // Skip escaped character } else if (selector[idx] === '(') { depth++; idx++; } else if (selector[idx] === ')') { depth--; idx++; } else { idx++; } } if (depth === 0) { var pseudoContent = selector.substring(contentStart, idx - 1); // Check if this pseudo should not have quoted strings for (var j = 0; j < noQuotesPseudos.length; j++) { if (pseudoName === noQuotesPseudos[j] && /['"]/.test(pseudoContent)) { return false; } } } } } } // Use the iterative validator to avoid regex backtracking issues return looseSelectorValidator(selector); } /** * Regular expression to match CSS pseudo-classes with arguments. * * Matches patterns like `:pseudo-class(argument)`, capturing the pseudo-class name and its argument. * * Capture groups: * 1. The pseudo-class name (letters and hyphens). * 2. The argument inside the parentheses (can contain nested parentheses, quoted strings, and other characters.). * * Global flag (`g`) is used to find all matches in the input string. * * Example matches: * - :nth-child(2n+1) * - :has(.sel:nth-child(3n)) * - :not(".foo, .bar") * * REPLACED WITH FUNCTION to avoid exponential backtracking. */ /** * Extract pseudo-classes with arguments from a selector using iterative parsing. * Replaces the previous globalPseudoClassRegExp to avoid exponential backtracking. * * Handles: * - Regular content without parentheses or quotes * - Single-quoted strings * - Double-quoted strings * - Nested parentheses (arbitrary depth) * * @param {string} selector - The CSS selector to parse * @returns {Array} Array of matches, each with: [fullMatch, pseudoName, pseudoArgs, startIndex] */ function extractPseudoClasses(selector) { var matches = []; for (var i = 0; i < selector.length; i++) { // Look for pseudo-class start (single or double colon) if (selector[i] === ':') { var pseudoStart = i; i++; // Skip second colon for pseudo-elements (::) if (i < selector.length && selector[i] === ':') { i++; } // Extract pseudo name var nameStart = i; while (i < selector.length && /[a-zA-Z\-]/.test(selector[i])) { i++; } if (i === nameStart) { continue; // No name found } var pseudoName = selector.substring(nameStart, i); // Check if this pseudo has arguments if (i < selector.length && selector[i] === '(') { i++; var argsStart = i; var depth = 1; var inSingleQuote = false; var inDoubleQuote = false; // Find matching closing paren (handle nesting and strings) while (i < selector.length && depth > 0) { var ch = selector[i]; if (ch === '\\') { i += 2; // Skip escaped character } else if (ch === "'" && !inDoubleQuote) { inSingleQuote = !inSingleQuote; i++; } else if (ch === '"' && !inSingleQuote) { inDoubleQuote = !inDoubleQuote; i++; } else if (ch === '(' && !inSingleQuote && !inDoubleQuote) { depth++; i++; } else if (ch === ')' && !inSingleQuote && !inDoubleQuote) { depth--; i++; } else { i++; } } if (depth === 0) { var pseudoArgs = selector.substring(argsStart, i - 1); var fullMatch = selector.substring(pseudoStart, i); // Store match in same format as regex: [fullMatch, pseudoName, pseudoArgs, startIndex] matches.push([fullMatch, pseudoName, pseudoArgs, pseudoStart]); } // Move back one since loop will increment i--; } } } return matches; } /** * Parses a CSS selector string and splits it into parts, handling nested parentheses. * * This function is useful for splitting selectors that may contain nested function-like * syntax (e.g., :not(.foo, .bar)), ensuring that commas inside parentheses do not split * the selector. * * @param {string} selector - The CSS selector string to parse. * @returns {string[]} An array of selector parts, split by top-level commas, with whitespace trimmed. */ function parseAndSplitNestedSelectors(selector) { var depth = 0; // Track parenthesis nesting depth var buffer = ""; // Accumulate characters for current selector part var parts = []; // Array of split selector parts var inSingleQuote = false; // Track if we're inside single quotes var inDoubleQuote = false; // Track if we're inside double quotes var i, char; for (i = 0; i < selector.length; i++) { char = selector.charAt(i); // Handle escape sequences - skip them entirely if (char === '\\' && i + 1 < selector.length) { buffer += char; i++; buffer += selector.charAt(i); continue; } // Handle single quote strings if (char === "'" && !inDoubleQuote) { inSingleQuote = !inSingleQuote; buffer += char; } // Handle double quote strings else if (char === '"' && !inSingleQuote) { inDoubleQuote = !inDoubleQuote; buffer += char; } // Process characters outside of quoted strings else if (!inSingleQuote && !inDoubleQuote) { if (char === '(') { // Entering a nested level (e.g., :is(...)) depth++; buffer += char; } else if (char === ')') { // Exiting a nested level depth--; buffer += char; } else if (char === ',' && depth === 0) { // Found a top-level comma separator - split here // Note: escaped commas (\,) are already handled above if (buffer.trim()) { parts.push(buffer.trim()); } buffer = ""; } else { // Regular character - add to buffer buffer += char; } } // Characters inside quoted strings - add to buffer else { buffer += char; } } // Add any remaining content in buffer as the last part var trimmed = buffer.trim(); if (trimmed) { // Preserve trailing space if selector ends with hex escape var endsWithHexEscape = endsWithHexEscapeRegExp.test(buffer); parts.push(endsWithHexEscape ? buffer.replace(leadingWhitespaceRegExp, '') : trimmed); } return parts; } /** * Validates a CSS selector string, including handling of nested selectors within certain pseudo-classes. * * This function checks if the provided selector is valid according to the rules defined by * `basicSelectorValidator`. For pseudo-classes that accept selector lists (such as :not, :is, :has, :where), * it recursively validates each nested selector using the same validation logic. * * @param {string} selector - The CSS selector string to validate. * @returns {boolean} Returns `true` if the selector is valid, otherwise `false`. */ // Cache to store validated selectors (previously a ES6 Map, now an ES5-compliant object) var validatedSelectorsCache = {}; // Only pseudo-classes that accept selector lists should recurse var selectorListPseudoClasses = { 'not': true, 'is': true, 'has': true, 'where': true }; function validateSelector(selector) { if (validatedSelectorsCache.hasOwnProperty(selector)) { return validatedSelectorsCache[selector]; } // Use function-based parsing to extract pseudo-classes (avoids backtracking) var pseudoClassMatches = extractPseudoClasses(selector); for (var j = 0; j < pseudoClassMatches.length; j++) { var pseudoClass = pseudoClassMatches[j][1]; if (selectorListPseudoClasses.hasOwnProperty(pseudoClass)) { var nestedSelectors = parseAndSplitNestedSelectors(pseudoClassMatches[j][2]); // Check if ANY selector in the list contains & (nesting selector) // If so, skip validation for the entire selector list since & will be replaced at runtime var hasAmpersand = false; for (var k = 0; k < nestedSelectors.length; k++) { if (ampersandRegExp.test(nestedSelectors[k])) { hasAmpersand = true; break; } } // If any selector has &, skip validation for this entire pseudo-class if (hasAmpersand) { continue; } // Otherwise, validate each selector normally for (var i = 0; i < nestedSelectors.length; i++) { var nestedSelector = nestedSelectors[i]; if (!validatedSelectorsCache.hasOwnProperty(nestedSelector)) { var nestedSelectorValidation = validateSelector(nestedSelector); validatedSelectorsCache[nestedSelector] = nestedSelectorValidation; if (!nestedSelectorValidation) { validatedSelectorsCache[selector] = false; return false; } } else if (!validatedSelectorsCache[nestedSelector]) { validatedSelectorsCache[selector] = false; return false; } } } } var basicSelectorValidation = basicSelectorValidator(selector); validatedSelectorsCache[selector] = basicSelectorValidation; return basicSelectorValidation; } /** * Validates namespace selectors by checking if the namespace prefix is defined. * * @param {string} selector - The CSS selector to validate * @returns {boolean} Returns true if the namespace is valid, false otherwise */ function validateNamespaceSelector(selector) { // Check if selector contains a namespace prefix // We need to ignore pipes inside attribute selectors var pipeIndex = -1; var inAttr = false; var inSingleQuote = false; var inDoubleQuote = false; for (var i = 0; i < selector.length; i++) { var char = selector[i]; // Handle escape sequences - skip hex escapes or simple escapes if (char === '\\') { var escapeLen = getEscapeSequenceLength(selector, i); if (escapeLen > 0) { i += escapeLen - 1; // -1 because loop will increment continue; } } if (inSingleQuote) { if (char === "'") { inSingleQuote = false; } } else if (inDoubleQuote) { if (char === '"') { inDoubleQuote = false; } } else if (inAttr) { if (char === "]") { inAttr = false; } else if (char === "'") { inSingleQuote = true; } else if (char === '"') { inDoubleQuote = true; } } else { if (char === "[") { inAttr = true; } else if (char === "|" && !inAttr) { // This is a namespace separator, not an attribute operator pipeIndex = i; break; } } } if (pipeIndex === -1) { return true; // No namespace, always valid } var namespacePrefix = selector.substring(0, pipeIndex); // Universal namespace (*|) and default namespace (|) are always valid if (namespacePrefix === '*' || namespacePrefix === '') { return true; } // Check if the custom namespace prefix is defined return definedNamespacePrefixes.hasOwnProperty(namespacePrefix); } /** * Normalizes escape sequences in a selector to match browser behavior. * Decodes escape sequences and re-encodes them in canonical form. * * @param {string} selector - The selector to normalize * @returns {string} Normalized selector */ function normalizeSelectorEscapes(selector) { var result = ''; var i = 0; var nextChar = ''; // Track context for identifier boundaries var inIdentifier = false; var inAttribute = false; var attributeDepth = 0; var needsEscapeForIdent = false; var lastWasHexEscape = false; while (i < selector.length) { var char = selector[i]; // Track attribute selector context if (char === '[' && !inAttribute) { inAttribute = true; attributeDepth = 1; result += char; i++; needsEscapeForIdent = false; inIdentifier = false; lastWasHexEscape = false; continue; } if (inAttribute) { if (char === '[') attributeDepth++; if (char === ']') { attributeDepth--; if (attributeDepth === 0) inAttribute = false; } // Don't normalize escapes inside attribute selectors if (char === '\\' && i + 1 < selector.length) { var escapeLen = getEscapeSequenceLength(selector, i); result += selector.substr(i, escapeLen); i += escapeLen; } else { result += char; i++; } lastWasHexEscape = false; continue; } // Handle escape sequences if (char === '\\') { var escapeLen = getEscapeSequenceLength(selector, i); if (escapeLen > 0) { var escapeSeq = selector.substr(i, escapeLen); var decoded = decodeEscapeSequence(escapeSeq); var wasHexEscape = startsWithHexEscapeRegExp.test(escapeSeq); var hadTerminatingSpace = wasHexEscape && escapeSeq[escapeLen - 1] === ' '; nextChar = selector[i + escapeLen] || ''; // Check if this character needs escaping var needsEscape = false; var useHexEscape = false; if (needsEscapeForIdent) { // At start of identifier (after . # or -) // Digits must be escaped, letters/underscore/_/- don't need escaping if (isDigit(decoded)) { needsEscape = true; useHexEscape = true; } else if (decoded === '-') { // Dash at identifier start: keep escaped if it's the only character, // otherwise it can be decoded var remainingSelector = selector.substring(i + escapeLen); var hasMoreIdentChars = remainingSelector && identCharRegExp.test(remainingSelector[0]); needsEscape = !hasMoreIdentChars; } else if (!identStartCharRegExp.test(decoded)) { needsEscape = true; } } else { if (specialCharsNeedEscapeRegExp.test(decoded)) { needsEscape = true; } } if (needsEscape) { if (useHexEscape) { // Use normalized hex escape var codePoint = decoded.charCodeAt(0); var hex = codePoint.toString(16); result += '\\' + hex; // Add space if next char could continue the hex sequence, // or if at end of selector (to disambiguate the escape) if (isHexDigit(nextChar) || !nextChar || afterHexEscapeSeparatorRegExp.test(nextChar)) { result += ' '; lastWasHexEscape = false; } else { lastWasHexEscape = true; } } else { // Use simple character escape result += '\\' + decoded; lastWasHexEscape = false; } } else { // No escape needed, use the character directly // But if previous was hex escape (without terminating space) and this is alphanumeric, add space if (lastWasHexEscape && !hadTerminatingSpace && isAlphanumeric(decoded)) { result += ' '; } result += decoded; // Preserve terminating space at end of selector (when followed by non-ident char) if (hadTerminatingSpace && (!nextChar || afterHexEscapeSeparatorRegExp.test(nextChar))) { result += ' '; } lastWasHexEscape = false; } i += escapeLen; // After processing escape, check if we're still needing ident validation // Only stay in needsEscapeForIdent state if decoded was '-' needsEscapeForIdent = needsEscapeForIdent && decoded === '-'; inIdentifier = true; continue; } } // Handle regular characters if (char === '.' || char === '#') { result += char; needsEscapeForIdent = true; inIdentifier = false; lastWasHexEscape = false;