fast-xml-parser
Version:
Validate XML or Parse XML to JS/JSON very fast without C/C++ based libraries
256 lines (232 loc) • 8.04 kB
JavaScript
;
const util = require('./util');
const buildOptions = require('./util').buildOptions;
const xmlNode = require('./xmlNode');
const TagType = {OPENING: 1, CLOSING: 2, SELF: 3, CDATA: 4};
const attrstr_regex = '((\\s*[\\w\\-._:]+(=((\'([^\']*)\')|("([^"]*)")))?)*)\\s*';
let regx = '<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|(([\\w:\\-._]*:)?([\\w:\\-._]+))'+attrstr_regex+'(\\/)?>|((\\/)(([\\w:\\-._]*:)?([\\w:\\-._]+))\\s*>))([^<]*)';
//'<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|(([\\w:\\-._]*:)?([\\w:\\-._]+))([^>]*)>|((\\/)(([\\w:\\-._]*:)?([\\w:\\-._]+))\\s*>))([^<]*)';
//const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
//const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
//polyfill
if (!Number.parseInt && window.parseInt) {
Number.parseInt = window.parseInt;
}
if (!Number.parseFloat && window.parseFloat) {
Number.parseFloat = window.parseFloat;
}
const defaultOptions = {
attributeNamePrefix: '@_',
attrNodeName: false,
textNodeName: '#text',
ignoreAttributes: true,
ignoreNameSpace: false,
allowBooleanAttributes: false, //a tag can have attributes without any value
//ignoreRootElement : false,
parseNodeValue: true,
parseAttributeValue: false,
arrayMode: false,
trimValues: true, //Trim string values of tag and attributes
cdataTagName: false,
cdataPositionChar: '\\c',
localeRange: '',
tagValueProcessor: function(a) {
return a;
},
attrValueProcessor: function(a) {
return a;
},
stopNodes: []
//decodeStrict: false,
};
exports.defaultOptions = defaultOptions;
const props = [
'attributeNamePrefix',
'attrNodeName',
'textNodeName',
'ignoreAttributes',
'ignoreNameSpace',
'allowBooleanAttributes',
'parseNodeValue',
'parseAttributeValue',
'arrayMode',
'trimValues',
'cdataTagName',
'cdataPositionChar',
'localeRange',
'tagValueProcessor',
'attrValueProcessor',
'parseTrueNumberOnly',
'stopNodes'
];
exports.props = props;
const getTraversalObj = function(xmlData, options) {
options = buildOptions(options, defaultOptions, props);
//xmlData = xmlData.replace(/\r?\n/g, " ");//make it single line
xmlData = xmlData.replace(/<!--[\s\S]*?-->/g, ''); //Remove comments
const xmlObj = new xmlNode('!xml');
let currentNode = xmlObj;
regx = regx.replace(/\[\\w/g, '[' + options.localeRange + '\\w');
const tagsRegx = new RegExp(regx, 'g');
let tag = tagsRegx.exec(xmlData);
let nextTag = tagsRegx.exec(xmlData);
while (tag) {
//console.log(tag)
const tagType = checkForTagType(tag);
//console.log(tagType)
if (tagType === TagType.CLOSING) {
//add parsed data to parent node
if (currentNode.parent && tag[22]) {
currentNode.parent.val = util.getValue(currentNode.parent.val) + '' + processTagValue(tag[22], options);
}
if (options.stopNodes.length && options.stopNodes.includes(currentNode.tagname)) {
currentNode.child = []
if (currentNode.attrsMap == undefined) { currentNode.attrsMap = {}}
currentNode.val = xmlData.substr(currentNode.startIndex + 1, tag.index - currentNode.startIndex - 1)
}
currentNode = currentNode.parent;
} else if (tagType === TagType.CDATA) {
if (options.cdataTagName) {
//add cdata node
const childNode = new xmlNode(options.cdataTagName, currentNode, tag[3]);
childNode.attrsMap = buildAttributesMap(tag[8], options);
currentNode.addChild(childNode);
//for backtracking
currentNode.val = util.getValue(currentNode.val) + options.cdataPositionChar;
//add rest value to parent node
if (tag[22]) {
currentNode.val += processTagValue(tag[22], options);
}
} else {
currentNode.val = (currentNode.val || '') + (tag[3] || '') + processTagValue(tag[22], options);
}
} else if (tagType === TagType.SELF) {
if (currentNode && tag[22]) {
currentNode.val = util.getValue(currentNode.val) + '' + processTagValue(tag[22], options);
}
const childNode = new xmlNode(options.ignoreNameSpace ? tag[7] : tag[5], currentNode, '');
/* if (tag[8] && tag[8].length > 0) {
tag[8] = tag[8].substr(0, tag[8].length - 1);
} */
childNode.attrsMap = buildAttributesMap(tag[8], options);
currentNode.addChild(childNode);
} else {
//TagType.OPENING
const childNode = new xmlNode(
options.ignoreNameSpace ? tag[7] : tag[5],
currentNode,
processTagValue(tag[22], options)
);
if (options.stopNodes.length && options.stopNodes.includes(childNode.tagname)) {
childNode.startIndex=tag.index + tag[1].length
}
childNode.attrsMap = buildAttributesMap(tag[8], options);
currentNode.addChild(childNode);
currentNode = childNode;
}
tag = nextTag;
nextTag = tagsRegx.exec(xmlData);
}
return xmlObj;
};
function processTagValue(val, options) {
if (val) {
if (options.trimValues) {
val = val.trim();
}
val = options.tagValueProcessor(val);
val = parseValue(val, options.parseNodeValue, options.parseTrueNumberOnly);
}
return val;
}
function checkForTagType(match) {
if (match[4] === ']]>') {
return TagType.CDATA;
} else if (match[18] === '/') {
return TagType.CLOSING;
} else if (match[16] === '/') {
return TagType.SELF;
} else {
return TagType.OPENING;
}
}
function resolveNameSpace(tagname, options) {
if (options.ignoreNameSpace) {
const tags = tagname.split(':');
const prefix = tagname.charAt(0) === '/' ? '/' : '';
if (tags[0] === 'xmlns') {
return '';
}
if (tags.length === 2) {
tagname = prefix + tags[1];
}
}
return tagname;
}
function parseValue(val, shouldParse, parseTrueNumberOnly) {
if (shouldParse && typeof val === 'string') {
let parsed;
if (val.trim() === '' || isNaN(val)) {
parsed = val === 'true' ? true : val === 'false' ? false : val;
} else {
if (val.indexOf('0x') !== -1) {
//support hexa decimal
parsed = Number.parseInt(val, 16);
} else if (val.indexOf('.') !== -1) {
parsed = Number.parseFloat(val);
} else {
parsed = Number.parseInt(val, 10);
}
if (parseTrueNumberOnly) {
parsed = String(parsed) === val ? parsed : val;
}
}
return parsed;
} else {
if (util.isExist(val)) {
return val;
} else {
return '';
}
}
}
//TODO: change regex to capture NS
//const attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm");
const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])(.*?)\\3)?', 'g');
function buildAttributesMap(attrStr, options) {
if (!options.ignoreAttributes && typeof attrStr === 'string') {
attrStr = attrStr.replace(/\r?\n/g, ' ');
//attrStr = attrStr || attrStr.trim();
const matches = util.getAllMatches(attrStr, attrsRegx);
const len = matches.length; //don't make it inline
const attrs = {};
for (let i = 0; i < len; i++) {
const attrName = resolveNameSpace(matches[i][1], options);
if (attrName.length) {
if (matches[i][4] !== undefined) {
if (options.trimValues) {
matches[i][4] = matches[i][4].trim();
}
matches[i][4] = options.attrValueProcessor(matches[i][4]);
attrs[options.attributeNamePrefix + attrName] = parseValue(
matches[i][4],
options.parseAttributeValue,
options.parseTrueNumberOnly
);
} else if (options.allowBooleanAttributes) {
attrs[options.attributeNamePrefix + attrName] = true;
}
}
}
if (!Object.keys(attrs).length) {
return;
}
if (options.attrNodeName) {
const attrCollection = {};
attrCollection[options.attrNodeName] = attrs;
return attrCollection;
}
return attrs;
}
}
exports.getTraversalObj = getTraversalObj;