UNPKG

@mpxjs/core

Version:

mpx runtime core

372 lines (309 loc) 6.53 kB
import through from './through' const PSEUDOSTART = 'pseudo-start' const ATTR_START = 'attr-start' const ANY_CHILD = 'any-child' const ATTR_COMP = 'attr-comp' const ATTR_END = 'attr-end' const PSEUDOPSEUDO = '::' const PSEUDOCLASS = ':' const READY = '(ready)' // 重置标志位 const OPERATION = 'op' const CLASS = 'class' const COMMA = 'comma' const ATTR = 'attr' const SUBJECT = '!' const TAG = 'tag' const STAR = '*' const ID = 'id' export default function tokenize () { let escaped = false let gathered = [] let state = READY let data = [] let idx = 0 let stream let length let quote let depth let lhs let rhs let cmp let c return (stream = through(ondata, onend)) function ondata (chunk) { data = data.concat(chunk.split('')) length = data.length while (idx < length && (c = data[idx++])) { switch (state) { case READY: state_ready() break case ANY_CHILD: state_any_child() break case OPERATION: state_op() break case ATTR_START: state_attr_start() break case ATTR_COMP: state_attr_compare() break case ATTR_END: state_attr_end() break case PSEUDOCLASS: case PSEUDOPSEUDO: state_pseudo() break case PSEUDOSTART: state_pseudostart() break case ID: case TAG: case CLASS: state_gather() break } } data = data.slice(idx) if (gathered.length) { stream.queue(token()) } } function onend (chunk) { // if (arguments.length) { // ondata(chunk) // } // if (gathered.length) { // stream.queue(token()) // } } function state_ready () { switch (true) { case c === '#': state = ID break case c === '.': state = CLASS break case c === ':': state = PSEUDOCLASS break case c === '[': state = ATTR_START break case c === '!': subject() break case c === '*': star() break case c === ',': comma() break case /[>+~]/.test(c): state = OPERATION break case /\s/.test(c): state = ANY_CHILD break case /[\w\d\-_]/.test(c): state = TAG --idx break } } function subject () { state = SUBJECT gathered = ['!'] stream.queue(token()) state = READY } function star () { state = STAR gathered = ['*'] stream.queue(token()) state = READY } function comma () { state = COMMA gathered = [','] stream.queue(token()) state = READY } function state_op () { if (/[>+~]/.test(c)) { return gathered.push(c) } // chomp down the following whitespace. if (/\s/.test(c)) { return } stream.queue(token()) state = READY --idx // 指针左移,归档,开始匹配下一个 token } function state_any_child () { if (/\s/.test(c)) { return } if (/[>+~]/.test(c)) { --idx state = OPERATION return state // return --idx, (state = OPERATION) } // 生成 any_child 节点,并重置状态 stream.queue(token()) state = READY --idx } function state_pseudo () { rhs = state state_gather(true) if (state !== READY) { return } if (c === '(') { lhs = gathered.join('') state = PSEUDOSTART gathered.length = 0 depth = 1 ++idx return } state = PSEUDOCLASS stream.queue(token()) state = READY } function state_pseudostart () { if (gathered.length === 0 && !quote) { quote = /['"]/.test(c) ? c : null if (quote) { return } } if (quote) { if (!escaped && c === quote) { quote = null return } if (c === '\\') { escaped ? gathered.push(c) : (escaped = true) return } escaped = false gathered.push(c) return } gathered.push(c) if (c === '(') { ++depth } else if (c === ')') { --depth } if (!depth) { gathered.pop() stream.queue({ type: rhs, data: lhs + '(' + gathered.join('') + ')' }) state = READY lhs = rhs = cmp = null gathered.length = 0 } } function state_attr_start () { // 在收集字符的阶段,还会有 state 标志位的判断,因此会影响到下面的逻辑执行 state_gather(true) if (state !== READY) { return } if (c === ']') { state = ATTR stream.queue(token()) state = READY return } lhs = gathered.join('') gathered.length = 0 state = ATTR_COMP } // 属性选择器:https://www.w3school.com.cn/css/css_attribute_selectors.asp function state_attr_compare () { if (/[=~|$^*]/.test(c)) { gathered.push(c) } // 操作符&= if (gathered.length === 2 || c === '=') { cmp = gathered.join('') gathered.length = 0 state = ATTR_END quote = null } } function state_attr_end () { if (!gathered.length && !quote) { quote = /['"]/.test(c) ? c : null if (quote) { return } } if (quote) { if (!escaped && c === quote) { quote = null return } if (c === '\\') { if (escaped) { gathered.push(c) } escaped = !escaped return } escaped = false gathered.push(c) return } state_gather(true) if (state !== READY) { return } stream.queue({ type: ATTR, data: { lhs: lhs, rhs: gathered.join(''), cmp: cmp } }) state = READY lhs = rhs = cmp = null gathered.length = 0 } function state_gather (quietly) { // 如果是非单词字符,例如 空格。会更新 state 的状态 if (/[^\d\w\-_]/.test(c) && !escaped) { if (c === '\\') { escaped = true } else { !quietly && stream.queue(token()) state = READY --idx } return } escaped = false gathered.push(c) } function token () { const data = gathered.join('') gathered.length = 0 return { type: state, data: data } } }