xml2
Version:
simple xml reader and parser
273 lines (269 loc) • 6.53 kB
JavaScript
const STATES = {
DATA: 0,
CDATA: 1,
TAG_OPEN: 2,
TAG_NAME: 3,
TAG_END: 4,
ATTRIBUTE_NAME_START: 5,
ATTRIBUTE_NAME: 6,
ATTRIBUTE_NAME_END: 7,
ATTRIBUTE_VALUE_BEGIN: 8,
ATTRIBUTE_VALUE: 9,
COMMENT: 10,
};
const ACTIONS = {
lt: 'action-lt',
gt: 'action-gt',
space: 'action-space',
equal: 'action-equal',
quote: 'action-quote',
slash: 'action-slash',
char: 'action-char',
bang: 'action-bang'
};
const TYPES = {
text: 'text',
comment: 'comment',
openTag: 'open-tag',
closeTag: 'close-tag',
attributeName: 'attribute-name',
attributeValue: 'attribute-value',
};
const charToAction = {
' ': ACTIONS.space,
'\t': ACTIONS.space,
'\n': ACTIONS.space,
'\r': ACTIONS.space,
'<': ACTIONS.lt,
'>': ACTIONS.gt,
'"': ACTIONS.quote,
"'": ACTIONS.quote,
'=': ACTIONS.equal,
'/': ACTIONS.slash,
'!': ACTIONS.bang,
};
const noop = () => {};
module.exports = emit => {
var data = '';
var tagName = '';
var attrName = '';
var attrValue = '';
var isClosing = false;
var openingQuote = '';
var comment = '';
var state = STATES.DATA;
const stateMachine = {
[]: {
[]: () => {
if (data.trim()) {
emit(TYPES.text, data);
}
tagName = '';
isClosing = false;
state = STATES.TAG_OPEN;
},
[]: (char) => {
data += char;
},
},
[]: {
[]: (char) => {
data += char;
if (data.substr(-3) === ']]>') {
emit(TYPES.text, data.slice(0, -3));
data = '';
state = STATES.DATA;
}
},
},
[]: {
[]: noop,
[]: () => {
comment = '';
state = STATES.COMMENT;
},
[]: (char) => {
tagName = char;
state = STATES.TAG_NAME;
},
[]: () => {
tagName = '';
isClosing = true;
},
},
[]: {
[]: char => {
comment += char;
},
[]: () => {
if(/^--(.+)--$/s.test(comment)){
emit(TYPES.comment, RegExp.$1);
state = STATES.DATA;
}
}
},
[]: {
[]: () => {
if (isClosing) {
state = STATES.TAG_END;
} else {
state = STATES.ATTRIBUTE_NAME_START;
emit(TYPES.openTag, tagName);
}
},
[]: () => {
if (isClosing) {
emit(TYPES.closeTag, tagName);
} else {
emit(TYPES.openTag, tagName);
}
data = '';
state = STATES.DATA;
},
[]: () => {
state = STATES.TAG_END;
emit(TYPES.openTag, tagName);
},
[]: (char) => {
tagName += char;
if (tagName === '![CDATA[') {
state = STATES.CDATA;
data = '';
tagName = '';
}
},
},
[]: {
[]: () => {
emit(TYPES.closeTag, tagName);
data = '';
state = STATES.DATA;
},
[]: noop,
},
[]: {
[]: (char) => {
attrName = char;
state = STATES.ATTRIBUTE_NAME;
},
[]: () => {
data = '';
state = STATES.DATA;
},
[]: noop,
[]: () => {
isClosing = true;
state = STATES.TAG_END;
},
},
[]: {
[]: () => {
emit(TYPES.attributeName, attrName);
state = STATES.ATTRIBUTE_NAME_END;
},
[]: () => {
emit(TYPES.attributeName, attrName);
state = STATES.ATTRIBUTE_VALUE_BEGIN;
},
[]: () => {
attrValue = '';
emit(TYPES.attributeName, attrName);
emit(TYPES.attributeValue, attrValue);
data = '';
state = STATES.DATA;
},
[]: () => {
isClosing = true;
attrValue = '';
emit(TYPES.attributeName, attrName);
emit(TYPES.attributeValue, attrValue);
state = STATES.TAG_END;
},
[]: (char) => {
attrName += char;
},
},
[]: {
[]: noop,
[]: () => {
state = STATES.ATTRIBUTE_VALUE_BEGIN;
},
[]: () => {
attrValue = '';
data = '';
state = STATES.DATA;
},
[]: (char) => {
attrValue = '';
attrName = char;
state = STATES.ATTRIBUTE_NAME;
},
},
[]: {
[]: noop,
[]: (char) => {
openingQuote = char;
attrValue = '';
state = STATES.ATTRIBUTE_VALUE;
},
[]: () => {
attrValue = '';
emit(TYPES.attributeValue, attrValue);
data = '';
state = STATES.DATA;
},
[]: (char) => {
openingQuote = '';
attrValue = char;
state = STATES.ATTRIBUTE_VALUE;
},
},
[]: {
[]: (char) => {
if (openingQuote) {
attrValue += char;
} else {
emit(TYPES.attributeValue, attrValue);
state = STATES.ATTRIBUTE_NAME_START;
}
},
[]: (char) => {
if (openingQuote === char) {
emit(TYPES.attributeValue, attrValue);
state = STATES.ATTRIBUTE_NAME_START;
} else {
attrValue += char;
}
},
[]: (char) => {
if (openingQuote) {
attrValue += char;
} else {
emit(TYPES.attributeValue, attrValue);
data = '';
state = STATES.DATA;
}
},
[]: (char) => {
if (openingQuote) {
attrValue += char;
} else {
emit(TYPES.attributeValue, attrValue);
isClosing = true;
state = STATES.TAG_END;
}
},
[]: (char) => {
attrValue += char;
},
},
};
return str => {
for (let i = 0; i < str.length; i++) {
var char = str[i];
var actions = stateMachine[state];
var action = charToAction[char] || ACTIONS.char;
(actions[action] || actions[ACTIONS.char])(char);
}
};
};