fast-xml-parser
Version:
Validate XML or Parse XML to JS/JSON very fast without C/C++ based libraries
177 lines (157 loc) • 5.85 kB
JavaScript
var getAllMatches = require("./util").getAllMatches;
var xmlNode = function(tagname,parent,val){
this.tagname = tagname;
this.parent = parent;
this.child = [];
this.val = val;
this.addChild = function (child){
this.child.push(child);
};
};
//var tagsRegx = new RegExp("<(\\/?[a-zA-Z0-9_:]+)([^>\\/]*)(\\/?)>([^<]+)?","g");
//var tagsRegx = new RegExp("<(\\/?[\\w:-]+)([^>]*)>([^<]+)?","g");
var cdataRegx = "<!\\[CDATA\\[([^\\]\\]]*)\\]\\]>"
var tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(<!\\[CDATA\\[([^\\]\\]]*)\\]\\]>)*([^<]+)?","g");
var defaultOptions = {
attrPrefix : "@_",
textNodeName : "#text",
ignoreNonTextNodeAttr : true,
ignoreTextNodeAttr : true,
ignoreNameSpace : false,
ignoreRootElement : false,
textNodeConversion : true
};
var buildOptions = function (options){
if(!options) options = {};
var props = ["attrPrefix","ignoreNonTextNodeAttr","ignoreTextNodeAttr","ignoreNameSpace","ignoreRootElement","textNodeName","textNodeConversion"];
for (var i = 0; i < props.length; i++) {
if(options[props[i]] === undefined){
options[props[i]] = defaultOptions[props[i]];
}
}
return options;
};
var getTraversalObj =function (xmlData,options){
options = buildOptions(options);
//xmlData = xmlData.replace(/>(\s+)/g, ">");//Remove spaces and make it single line.
var tags = getAllMatches(xmlData,tagsRegx);
var xmlObj = new xmlNode('!xml');
var currentNode = xmlObj;
for (var i = 0; i < tags.length ; i++) {
var tag = resolveNameSpace(tags[i][1],options.ignoreNameSpace),
nexttag = i+1 < tags.length ? resolveNameSpace(tags[i+1][1],options.ignoreNameSpace) : undefined,
attrsStr = tags[i][2], attrs,
val = tags[i][4] === undefined ? tags[i][5] : simplifyCDATA(tags[i][0]);
if(tag.indexOf("/") === 0){//ending tag
currentNode = currentNode.parent;
continue;
}
var selfClosingTag = attrsStr.charAt(attrsStr.length-1) === '/';
var childNode = new xmlNode(tag,currentNode);
if(selfClosingTag){
attrs = buildAttributesArr(attrsStr,options.ignoreTextNodeAttr,options.attrPrefix,options.ignoreNameSpace);
childNode.val = attrs || "";
currentNode.addChild(childNode);
}else if( ("/" + tag) === nexttag){ //Text node
attrs = buildAttributesArr(attrsStr,options.ignoreTextNodeAttr,options.attrPrefix,options.ignoreNameSpace);
val = parseValue(val,options.textNodeConversion);
if(attrs){
attrs[options.textNodeName] = val;
childNode.val = attrs;
}else{
childNode.val = val || "";
}
currentNode.addChild(childNode);
i++;
}else{//starting tag
attrs = buildAttributesArr(attrsStr,options.ignoreNonTextNodeAttr,options.attrPrefix,options.ignoreNameSpace);
if(attrs){
for (var prop in attrs) {
attrs.hasOwnProperty(prop) && childNode.addChild(new xmlNode(prop,childNode,attrs[prop]));
}
}
currentNode.addChild(childNode);
currentNode = childNode;
}
}
return xmlObj;
};
var xml2json = function (xmlData,options){
return convertToJson(getTraversalObj(xmlData,options));
};
var cdRegx = new RegExp("<!\\[CDATA\\[([^\\]\\]]*)\\]\\]>","g");
function simplifyCDATA(cdata){
var result = getAllMatches(cdata,cdRegx);
var val = "";
for (var i = 0; i < result.length ; i++) {
val+=result[i][1];
}
return val;
}
function resolveNameSpace(tagname,ignore){
if(ignore){
var tags = tagname.split(":");
var prefix = tagname.charAt(0) === "/" ? "/" : "";
if(tags.length === 2) {
tagname = prefix + tags[1];
}
}
return tagname;
}
function parseValue(val,conversion){
if(val){
if(!conversion || isNaN(val)){
val = "" + val ;
}else{
if(val.indexOf(".") !== -1){
val = Number.parseFloat(val);
}else{
val = Number.parseInt(val,10);
}
}
}else{
val = "";
}
return val;
}
//var attrsRegx = new RegExp("(\\S+)=\\s*[\"']?((?:.(?![\"']?\\s+(?:\\S+)=|[>\"']))+.)[\"']?","g");
//var attrsRegx = new RegExp("(\\S+)=\\s*(['\"])((?:.(?!\\2))*.)","g");
var attrsRegx = new RegExp("(\\S+)\\s*=\\s*(['\"])(.*?)\\2","g");
function buildAttributesArr(attrStr,ignore,prefix,ignoreNS){
attrStr = attrStr || attrStr.trim();
if(!ignore && attrStr.length > 3){
var matches = getAllMatches(attrStr,attrsRegx);
var attrs = {};
for (var i = 0; i < matches.length; i++) {
var attrName = prefix + resolveNameSpace( matches[i][1],ignoreNS);
attrs[attrName] = matches[i][3];
}
return attrs;
}
}
var convertToJson = function (node){
var jObj = {};
if(node.val || node.val === "") {
return node.val;
}else{
for (var index = 0; index < node.child.length; index++) {
var prop = node.child[index].tagname;
var obj = convertToJson(node.child[index]);
if(jObj[prop] !== undefined){
if(!Array.isArray(jObj[prop])){
var swap = jObj[prop];
jObj[prop] = [];
jObj[prop].push(swap);
}
jObj[prop].push(obj);
}else{
jObj[prop] = obj;
}
}
}
return jObj;
};
exports.parse = xml2json;
exports.getTraversalObj = getTraversalObj;
exports.convertToJson = convertToJson;
exports.validate = require("./validator").validate;