shady-css-parser
Version:
A fast, small and flexible CSS parser.
353 lines (306 loc) • 11.4 kB
text/typescript
/**
* @license
* Copyright (c) 2016 The Polymer Project Authors. All rights reserved.
* This code may only be used under the BSD style license found at
* http://polymer.github.io/LICENSE.txt The complete set of authors may be found
* at http://polymer.github.io/AUTHORS.txt The complete set of contributors may
* be found at http://polymer.github.io/CONTRIBUTORS.txt Code distributed by
* Google as part of the polymer project is also subject to an additional IP
* rights grant found at http://polymer.github.io/PATENTS.txt
*/
import {AtRule, Comment, Declaration, Discarded, Rule, Rulelist, Ruleset, Stylesheet} from './common';
import {NodeFactory} from './node-factory';
import {Token} from './token';
import {Tokenizer} from './tokenizer';
/**
* Class that implements a shady CSS parser.
*/
class Parser {
nodeFactory: NodeFactory;
/**
* Create a Parser instance. When creating a Parser instance, a specialized
* NodeFactory can be supplied to implement streaming analysis and
* manipulation of the CSS AST.
*/
constructor(nodeFactory = new NodeFactory()) {
this.nodeFactory = nodeFactory;
}
/**
* Parse CSS and generate an AST.
* @param cssText The CSS to parse.
* @return A CSS AST containing nodes that correspond to those
* generated by the Parser's NodeFactory.
*/
parse(cssText: string): Stylesheet {
return this.parseStylesheet(new Tokenizer(cssText));
}
/**
* Consumes tokens from a Tokenizer to parse a Stylesheet node.
* @param tokenizer A Tokenizer instance.
*/
parseStylesheet(tokenizer: Tokenizer): Stylesheet {
return this.nodeFactory.stylesheet(
this.parseRules(tokenizer), {start: 0, end: tokenizer.cssText.length});
}
/**
* Consumes tokens from a Tokenizer to parse a sequence of rules.
* @param tokenizer A Tokenizer instance.
* @return A list of nodes corresponding to rules. For a parser
* configured with a basic NodeFactory, any of Comment, AtRule, Ruleset,
* Declaration and Discarded nodes may be present in the list.
*/
parseRules(tokenizer: Tokenizer): Rule[] {
const rules = [];
while (tokenizer.currentToken) {
const rule = this.parseRule(tokenizer);
if (rule) {
rules.push(rule);
}
}
return rules;
}
/**
* Consumes tokens from a Tokenizer to parse a single rule.
* @param tokenizer A Tokenizer instance.
* @return If the current token in the Tokenizer is whitespace,
* returns null. Otherwise, returns the next parseable node.
*/
parseRule(tokenizer: Tokenizer): Rule|null {
// Trim leading whitespace:
const token = tokenizer.currentToken;
if (token === null) {
return null;
}
if (token.is(Token.type.whitespace)) {
tokenizer.advance();
return null;
} else if (token.is(Token.type.comment)) {
return this.parseComment(tokenizer);
} else if (token.is(Token.type.word)) {
return this.parseDeclarationOrRuleset(tokenizer);
} else if (token.is(Token.type.propertyBoundary)) {
return this.parseUnknown(tokenizer);
} else if (token.is(Token.type.at)) {
return this.parseAtRule(tokenizer);
} else {
return this.parseUnknown(tokenizer);
}
}
/**
* Consumes tokens from a Tokenizer to parse a Comment node.
* @param tokenizer A Tokenizer instance.
*/
parseComment(tokenizer: Tokenizer): Comment|null {
const token = tokenizer.advance();
if (token === null) {
return null;
}
return this.nodeFactory.comment(
tokenizer.slice(token), {start: token.start, end: token.end});
}
/**
* Consumes tokens from a Tokenizer through the next boundary token to
* produce a Discarded node. This supports graceful recovery from many
* malformed CSS conditions.
* @param tokenizer A Tokenizer instance.
*/
parseUnknown(tokenizer: Tokenizer): Discarded|null {
const start = tokenizer.advance();
let end;
if (start === null) {
return null;
}
while (tokenizer.currentToken &&
tokenizer.currentToken.is(Token.type.boundary)) {
end = tokenizer.advance();
}
return this.nodeFactory.discarded(
tokenizer.slice(start, end), tokenizer.getRange(start, end));
}
/**
* Consumes tokens from a Tokenizer to parse an At Rule node.
* @param tokenizer A Tokenizer instance.
*/
parseAtRule(tokenizer: Tokenizer): AtRule|null {
let name = undefined;
let nameRange = undefined;
let rulelist: Rulelist|undefined = undefined;
let parametersStart = undefined;
let parametersEnd = undefined;
if (!tokenizer.currentToken) {
return null;
}
const start = tokenizer.currentToken.start;
while (tokenizer.currentToken) {
if (tokenizer.currentToken.is(Token.type.whitespace)) {
tokenizer.advance();
} else if (!name && tokenizer.currentToken.is(Token.type.at)) {
// Discard the @:
tokenizer.advance();
const start = tokenizer.currentToken;
let end;
while (tokenizer.currentToken &&
tokenizer.currentToken.is(Token.type.word)) {
end = tokenizer.advance();
}
nameRange = tokenizer.getRange(start, end);
name = tokenizer.cssText.slice(nameRange.start, nameRange.end);
} else if (tokenizer.currentToken.is(Token.type.openBrace)) {
rulelist = this.parseRulelist(tokenizer);
break;
} else if (tokenizer.currentToken.is(Token.type.propertyBoundary)) {
tokenizer.advance();
break;
} else {
if (parametersStart == null) {
parametersStart = tokenizer.advance();
} else {
parametersEnd = tokenizer.advance();
}
}
}
if (name === undefined || nameRange === undefined) {
return null;
}
let parametersRange = undefined;
let parameters = '';
if (parametersStart) {
parametersRange = tokenizer.trimRange(
tokenizer.getRange(parametersStart, parametersEnd));
parameters =
tokenizer.cssText.slice(parametersRange.start, parametersRange.end);
}
const end = tokenizer.currentToken ? tokenizer.currentToken.previous!.end :
tokenizer.cssText.length;
return this.nodeFactory.atRule(
name, parameters, rulelist, nameRange, parametersRange, {start, end});
}
/**
* Consumes tokens from a Tokenizer to produce a Rulelist node.
* @param tokenizer A Tokenizer instance.
*/
parseRulelist(tokenizer: Tokenizer): Rulelist {
const rules = [];
const start = tokenizer.currentToken!.start;
let endToken;
// Take the opening { boundary:
tokenizer.advance();
while (tokenizer.currentToken) {
if (tokenizer.currentToken.is(Token.type.closeBrace)) {
endToken = tokenizer.currentToken;
tokenizer.advance();
break;
} else {
const rule = this.parseRule(tokenizer);
if (rule) {
rules.push(rule);
}
}
}
// If we don't have an end token it's because we reached the end of input.
const end = endToken ? endToken.end : tokenizer.cssText.length;
return this.nodeFactory.rulelist(rules, {start, end});
}
/**
* Consumes tokens from a Tokenizer instance to produce a Declaration node or
* a Ruleset node, as appropriate.
* @param tokenizer A Tokenizer node.
*/
parseDeclarationOrRuleset(tokenizer: Tokenizer): Declaration|Ruleset|null {
let ruleStart = null;
let ruleEnd = null;
let colon = null;
// This code is not obviously correct. e.g. there's what looks to be a
// null-dereference if the declaration starts with an open brace or
// property boundary.. though that may be impossible.
while (tokenizer.currentToken) {
if (tokenizer.currentToken.is(Token.type.whitespace)) {
tokenizer.advance();
} else if (tokenizer.currentToken.is(Token.type.openParenthesis)) {
// skip until close paren
while (tokenizer.currentToken &&
!tokenizer.currentToken.is(Token.type.closeParenthesis)) {
tokenizer.advance();
}
} else if (
tokenizer.currentToken.is(Token.type.openBrace) ||
tokenizer.currentToken.is(Token.type.propertyBoundary)) {
break;
} else {
if (tokenizer.currentToken.is(Token.type.colon)) {
colon = tokenizer.currentToken;
}
if (ruleStart === null) {
ruleStart = tokenizer.advance();
ruleEnd = ruleStart;
} else {
ruleEnd = tokenizer.advance();
}
}
}
if (tokenizer.currentToken === null) {
// terminated early
return null;
}
// A ruleset never contains or ends with a semi-colon.
if (tokenizer.currentToken.is(Token.type.propertyBoundary)) {
const nameRange =
tokenizer.getRange(ruleStart!, colon ? colon.previous : ruleEnd);
const declarationName =
tokenizer.cssText.slice(nameRange.start, nameRange.end);
let expression = undefined;
if (colon && colon.next) {
const rawExpressionRange = tokenizer.getRange(colon.next, ruleEnd);
const expressionRange = tokenizer.trimRange(rawExpressionRange);
const expressionValue =
tokenizer.cssText.slice(expressionRange.start, expressionRange.end);
expression =
this.nodeFactory.expression(expressionValue, expressionRange);
}
if (tokenizer.currentToken.is(Token.type.semicolon)) {
tokenizer.advance();
}
const range = tokenizer.trimRange(tokenizer.getRange(
ruleStart!,
tokenizer.currentToken && tokenizer.currentToken.previous ||
ruleEnd));
return this.nodeFactory.declaration(
declarationName, expression, nameRange, range);
// This is the case for a mixin-like structure:
} else if (colon && colon === ruleEnd) {
const rulelist = this.parseRulelist(tokenizer);
if (tokenizer.currentToken.is(Token.type.semicolon)) {
tokenizer.advance();
}
const nameRange = tokenizer.getRange(ruleStart!, ruleEnd.previous);
const declarationName =
tokenizer.cssText.slice(nameRange.start, nameRange.end);
const range = tokenizer.trimRange(tokenizer.getRange(
ruleStart!,
tokenizer.currentToken && tokenizer.currentToken.previous ||
ruleEnd));
return this.nodeFactory.declaration(
declarationName, rulelist, nameRange, range);
// Otherwise, this is a ruleset:
} else {
const selectorRange = tokenizer.getRange(ruleStart!, ruleEnd);
const selector =
tokenizer.cssText.slice(selectorRange.start, selectorRange.end);
const rulelist = this.parseRulelist(tokenizer);
const start = ruleStart!.start;
let end;
if (tokenizer.currentToken) {
end = tokenizer.currentToken.previous ?
tokenizer.currentToken.previous.end :
ruleStart!.end;
} else {
// no current token? must have reached the end of input, so go up
// until there
end = tokenizer.cssText.length;
}
return this.nodeFactory.ruleset(
selector, rulelist, selectorRange, {start, end});
}
}
}
export {Parser};