UNPKG

@dolphinweex/himalaya

Version:

HTML to JSON parser

github.com/andrejewski/himalaya

andrejewski/himalaya

317 lines (287 loc) • 7.92 kB

JavaScript

import { startsWith, endsWith, stringIncludes, arrayIncludes } from './compat' export function feedPosition (position, str, len) { const start = position.index const end = position.index = start + len for (let i = start; i < end; i++) { const char = str.charAt(i) if (char === '\n') { position.line++ position.column = 0 } else { position.column++ } } } export function jumpPosition (position, str, end) { const len = end - position.index return feedPosition(position, str, len) } export function makeInitialPosition () { return { index: 0, column: 0, line: 0 } } export function copyPosition (position) { return { index: position.index, line: position.line, column: position.column } } export default function lexer (str, options) { const state = { str, options, position: makeInitialPosition(), tokens: [] } lex(state) return state.tokens } export function lex (state) { const {str, options: {childlessTags}} = state const len = str.length while (state.position.index < len) { const start = state.position.index lexText(state) if (state.position.index === start) { const isComment = startsWith(str, '!--', start + 1) if (isComment) { lexComment(state) } else { const tagName = lexTag(state) const safeTag = tagName.toLowerCase() if(safeTag == 'template'){ //只有第一个也就是最外层的template需要跳过内部tempalte不需要 if(state.position.line == 1){ if (arrayIncludes(childlessTags, safeTag)) { lexSkipTag(tagName, state) } } } } } } } const alphanumeric = /[A-Za-z0-9]/ export function findTextEnd (str, index) { while (true) { const textEnd = str.indexOf('<', index) if (textEnd === -1) { return textEnd } const char = str.charAt(textEnd + 1) if (char === '/' || char === '!' || alphanumeric.test(char)) { return textEnd } index = textEnd + 1 } } export function lexText (state) { const type = 'text' const {str, position} = state let textEnd = findTextEnd(str, position.index) if (textEnd === position.index) return if (textEnd === -1) { textEnd = str.length } const start = copyPosition(position) const content = str.slice(position.index, textEnd) jumpPosition(position, str, textEnd) const end = copyPosition(position) state.tokens.push({type, content, position: {start, end}}) } export function lexComment (state) { const {str, position} = state const start = copyPosition(position) feedPosition(position, str, 4) // "', position.index) let commentEnd = contentEnd + 3 // "-->".length if (contentEnd === -1) { contentEnd = commentEnd = str.length } const content = str.slice(position.index, contentEnd) jumpPosition(position, str, commentEnd) state.tokens.push({ type: 'comment', content, position: { start, end: copyPosition(position) } }) } export function lexTag (state) { const {str, position} = state { const secondChar = str.charAt(position.index + 1) const close = secondChar === '/' const start = copyPosition(position) feedPosition(position, str, close ? 2 : 1) state.tokens.push({type: 'tag-start', close, position: {start}}) } const tagName = lexTagName(state) lexTagAttributes(state) { const firstChar = str.charAt(position.index) const close = firstChar === '/' feedPosition(position, str, close ? 2 : 1) const end = copyPosition(position) state.tokens.push({type: 'tag-end', close, position: {end}}) } return tagName } // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#special-white-space const whitespace = /\s/ export function isWhitespaceChar (char) { return whitespace.test(char) } export function lexTagName (state) { const {str, position} = state const len = str.length let start = position.index while (start < len) { const char = str.charAt(start) const isTagChar = !(isWhitespaceChar(char) || char === '/' || char === '>') if (isTagChar) break start++ } let end = start + 1 while (end < len) { const char = str.charAt(end) const isTagChar = !(isWhitespaceChar(char) || char === '/' || char === '>') if (!isTagChar) break end++ } jumpPosition(position, str, end) const tagName = str.slice(start, end) state.tokens.push({ type: 'tag', content: tagName }) return tagName } export function lexTagAttributes (state) { const {str, position, tokens} = state let cursor = position.index let quote = null // null, single-, or double-quote let wordBegin = cursor // index of word start const words = [] // "key", "key=value", "key='value'", etc const len = str.length while (cursor < len) { const char = str.charAt(cursor) if (quote) { const isQuoteEnd = char === quote if (isQuoteEnd) { quote = null } cursor++ continue } const isTagEnd = char === '/' || char === '>' if (isTagEnd) { if (cursor !== wordBegin) { words.push(str.slice(wordBegin, cursor)) } break } const isWordEnd = isWhitespaceChar(char) if (isWordEnd) { if (cursor !== wordBegin) { words.push(str.slice(wordBegin, cursor)) } wordBegin = cursor + 1 cursor++ continue } const isQuoteStart = char === '\'' || char === '"' if (isQuoteStart) { quote = char cursor++ continue } cursor++ } jumpPosition(position, str, cursor) const wLen = words.length const type = 'attribute' for (let i = 0; i < wLen; i++) { const word = words[i] const isNotPair = word.indexOf('=') === -1 if (isNotPair) { const secondWord = words[i + 1] if (secondWord && startsWith(secondWord, '=')) { if (secondWord.length > 1) { const newWord = word + secondWord tokens.push({type, content: newWord}) i += 1 continue } const thirdWord = words[i + 2] i += 1 if (thirdWord) { const newWord = word + '=' + thirdWord tokens.push({type, content: newWord}) i += 1 continue } } } if (endsWith(word, '=')) { const secondWord = words[i + 1] if (secondWord && !stringIncludes(secondWord, '=')) { const newWord = word + secondWord tokens.push({type, content: newWord}) i += 1 continue } const newWord = word.slice(0, -1) tokens.push({type, content: newWord}) continue } tokens.push({type, content: word}) } } const push = [].push export function lexSkipTag (tagName, state) { const {str, position, tokens} = state const safeTagName = tagName.toLowerCase() const len = str.length let index = position.index while (index < len) { const nextTag = str.indexOf('</', index) if (nextTag === -1) { lexText(state) break } const tagStartPosition = copyPosition(position) jumpPosition(tagStartPosition, str, nextTag) const tagState = {str, position: tagStartPosition, tokens: []} const name = lexTag(tagState) if (safeTagName !== name.toLowerCase()) { index = tagState.position.index continue } if (nextTag !== position.index) { const textStart = copyPosition(position) jumpPosition(position, str, nextTag) tokens.push({ type: 'text', content: str.slice(textStart.index, nextTag), position: { start: textStart, end: copyPosition(position) } }) } push.apply(tokens, tagState.tokens) jumpPosition(position, str, tagState.position.index) break } }