UNPKG

htmlstr-parser

Version:

Simple HTML to JSON parser use Regexp and String.indexOf

48 lines (40 loc) 1.32 kB
import { STARTTAG_REX, ENDTAG_REX }from './regexp' import { isEmptyMaker } from './makers' import { TagStart, TagEmpty, TagEnd, Text} from './types' export function tokenize(html) { let string = html let tokens = [] const maxTime = Date.now() + 1000 while (string) { if (string.indexOf("<!--") === 0) { const lastIndex = string.indexOf("-->") + 3 string = string.substring(lastIndex) continue } if (string.indexOf("</") === 0) { const match = string.match(ENDTAG_REX) if (!match) continue string = string.substring(match[0].length) const name = match[1] if (isEmptyMaker(name)) continue tokens.push(new TagEnd(name)) continue } if (string.indexOf("<") === 0) { const match = string.match(STARTTAG_REX) if (!match) continue string = string.substring(match[0].length) const name = match[1] const attrs = match[2] const token = isEmptyMaker(name) ? new TagEmpty(name, attrs) : new TagStart(name, attrs) tokens.push(token) continue } const index = string.indexOf('<') const text = index < 0 ? string : string.substring(0, index) string = index < 0 ? "" : string.substring(index) tokens.push(new Text(text)) if (Date.now() >= maxTime) break } return tokens }