UNPKG

html5parser

Version:
307 lines (286 loc) 7.57 kB
/*! * * Copyright 2017 - acrazing * * @author acrazing joking.young@gmail.com * @since 2017-08-19 00:54:46 * @version 1.0.0 * @desc parse.ts */ import { noNestedTags, selfCloseTags } from './config'; import { IToken, tokenize, TokenKind } from './tokenize'; import { IAttribute, IAttributeValue, INode, ITag, IText, SyntaxKind } from './types'; import { getLineRanges, getPosition } from './utils'; import { walk } from './walk'; interface IContext { parent: IContext | undefined; tag: ITag; } export interface ParseOptions { // create tag's attributes map // if true, will set ITag.attributeMap property // as a `Record<string, IAttribute>` setAttributeMap: boolean; } let index: number; let count: number; let tokens: IToken[]; let tagChain: IContext | undefined; let nodes: INode[]; let token: IToken; let node: IText | undefined; let buffer: string; let lines: number[] | undefined; let parseOptions: ParseOptions | undefined; function init(input?: string, options?: ParseOptions) { if (input === void 0) { count = 0; tokens.length = 0; buffer = ''; } else { tokens = tokenize(input); count = tokens.length; buffer = input; } index = 0; tagChain = void 0; nodes = []; token = void 0 as any; node = void 0; lines = void 0; parseOptions = options; } function pushNode(_node: ITag | IText) { if (!tagChain) { nodes.push(_node); } else if ( _node.type === SyntaxKind.Tag && _node.name === tagChain.tag.name && noNestedTags[_node.name] ) { tagChain = tagChain.parent; pushNode(_node); } else if (tagChain.tag.body) { tagChain.tag.end = _node.end; tagChain.tag.body.push(_node); } } function pushTagChain(tag: ITag) { tagChain = { parent: tagChain, tag: tag }; node = void 0; } function createLiteral(start = token.start, end = token.end, value = token.value): IText { return { start, end, value, type: SyntaxKind.Text }; } function createTag(): ITag { return { start: token.start - 1, // include < end: token.end, type: SyntaxKind.Tag, open: createLiteral(token.start - 1), // not finished name: token.value, rawName: buffer.substring(token.start, token.end), attributes: [], attributeMap: void 0, body: null, close: null, }; } function createAttribute(): IAttribute { return { start: token.start, end: token.end, name: createLiteral(), value: void 0, }; } function createAttributeValue(): IAttributeValue { return { start: token.start, end: token.end, value: token.type === TokenKind.AttrValueNq ? token.value : token.value.substr(1, token.value.length - 2), quote: token.type === TokenKind.AttrValueNq ? void 0 : token.type === TokenKind.AttrValueSq ? "'" : '"', }; } function appendLiteral(_node: IText | IAttributeValue = node as IText) { _node.value += token.value; _node.end = token.end; } function unexpected() { if (lines === void 0) { lines = getLineRanges(buffer); } const [line, column] = getPosition(lines, token.start); throw new Error( `Unexpected token "${token.value}(${token.type})" at [${line},${column}]` + (tagChain ? ` when parsing tag: ${JSON.stringify(tagChain.tag.name)}.` : ''), ); } function buildAttributeMap(tag: ITag) { tag.attributeMap = {}; for (const attr of tag.attributes) { tag.attributeMap[attr.name.value] = attr; } } const enum OpenTagState { BeforeAttr, InName, AfterName, AfterEqual, InValue, } function parseOpenTag() { let state = OpenTagState.BeforeAttr; let attr: IAttribute = void 0 as any; const tag = createTag(); pushNode(tag); if (tag.name === '' || tag.name === '!' || tag.name === '!--') { tag.open.value = '<' + tag.open.value; if (index === count) { return; } else { token = tokens[++index]; if (token.type !== TokenKind.OpenTagEnd) { node = createLiteral(); tag.body = [node]; while (++index < count) { token = tokens[index]; if (token.type === TokenKind.OpenTagEnd) { node = void 0; break; } appendLiteral(); } } tag.close = createLiteral(token.start, token.end + 1, `${token.value}>`); tag.end = tag.close.end; } return; } while (++index < count) { token = tokens[index]; if (token.type === TokenKind.OpenTagEnd) { tag.end = tag.open.end = token.end + 1; tag.open.value = buffer.substring(tag.open.start, tag.open.end); if (token.value === '' && !selfCloseTags[tag.name]) { tag.body = []; pushTagChain(tag); } else { tag.body = void 0; } break; } else if (state === OpenTagState.BeforeAttr) { if (token.type !== TokenKind.Whitespace) { attr = createAttribute(); state = OpenTagState.InName; tag.attributes.push(attr); } } else if (state === OpenTagState.InName) { if (token.type === TokenKind.Whitespace) { state = OpenTagState.AfterName; } else if (token.type === TokenKind.AttrValueEq) { state = OpenTagState.AfterEqual; } else { appendLiteral(attr.name); } } else if (state === OpenTagState.AfterName) { if (token.type !== TokenKind.Whitespace) { if (token.type === TokenKind.AttrValueEq) { state = OpenTagState.AfterEqual; } else { attr = createAttribute(); state = OpenTagState.InName; tag.attributes.push(attr); } } } else if (state === OpenTagState.AfterEqual) { if (token.type !== TokenKind.Whitespace) { attr.value = createAttributeValue(); if (token.type === TokenKind.AttrValueNq) { state = OpenTagState.InValue; } else { attr.end = attr.value.end; state = OpenTagState.BeforeAttr; } } } else { if (token.type === TokenKind.Whitespace) { attr.end = attr.value!.end; state = OpenTagState.BeforeAttr; } else { appendLiteral(attr.value); } } } } function parseCloseTag() { let _context = tagChain; while (true) { if (!_context || token.value.trim() === _context.tag.name) { break; } _context = _context.parent; } if (!_context) { return; } _context.tag.close = createLiteral( token.start - 2, token.end + 1, buffer.substring(token.start - 2, token.end + 1), ); _context.tag.end = _context.tag.close.end; _context = _context.parent; tagChain = _context; } export function parse(input: string, options?: ParseOptions): INode[] { init(input, { setAttributeMap: false, ...options, } as ParseOptions); while (index < count) { token = tokens[index]; switch (token.type) { case TokenKind.Literal: if (!node) { node = createLiteral(); pushNode(node); } else { appendLiteral(node); } break; case TokenKind.OpenTag: node = void 0; parseOpenTag(); break; case TokenKind.CloseTag: node = void 0; parseCloseTag(); break; default: unexpected(); break; } index++; } const _nodes = nodes; if (parseOptions?.setAttributeMap) { walk(_nodes, { enter(node: IText | ITag): void { if (node.type === SyntaxKind.Tag) { buildAttributeMap(node); } }, }); } init(); return _nodes; }