fast-xml-parser
Version:
Validate XML or Parse XML to JS/JSON very fast without C/C++ based libraries
298 lines (260 loc) • 10.9 kB
JavaScript
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.parser = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){
var getAllMatches = require("./util").getAllMatches;
var xmlNode = function(tagname,parent,val){
this.tagname = tagname;
this.parent = parent;
this.child = [];
this.val = val;
this.addChild = function (child){
this.child.push(child);
};
};
//var tagsRegx = new RegExp("<(\\/?[a-zA-Z0-9_:]+)([^>\\/]*)(\\/?)>([^<]+)?","g");
//var tagsRegx = new RegExp("<(\\/?[\\w:-]+)([^>]*)>([^<]+)?","g");
var cdataRegx = "<!\\[CDATA\\[([^\\]\\]]*)\\]\\]>"
var tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(<!\\[CDATA\\[([^\\]\\]]*)\\]\\]>)*([^<]+)?","g");
var defaultOptions = {
attrPrefix : "@_",
textNodeName : "#text",
ignoreNonTextNodeAttr : true,
ignoreTextNodeAttr : true,
ignoreNameSpace : false,
ignoreRootElement : false,
textNodeConversion : true
};
var buildOptions = function (options){
if(!options) options = {};
var props = ["attrPrefix","ignoreNonTextNodeAttr","ignoreTextNodeAttr","ignoreNameSpace","ignoreRootElement","textNodeName","textNodeConversion"];
for (var i = 0; i < props.length; i++) {
if(options[props[i]] === undefined){
options[props[i]] = defaultOptions[props[i]];
}
}
return options;
};
var getTraversalObj =function (xmlData,options){
options = buildOptions(options);
//xmlData = xmlData.replace(/>(\s+)/g, ">");//Remove spaces and make it single line.
var tags = getAllMatches(xmlData,tagsRegx);
var xmlObj = new xmlNode('!xml');
var currentNode = xmlObj;
for (var i = 0; i < tags.length ; i++) {
var tag = resolveNameSpace(tags[i][1],options.ignoreNameSpace),
nexttag = i+1 < tags.length ? resolveNameSpace(tags[i+1][1],options.ignoreNameSpace) : undefined,
attrsStr = tags[i][2], attrs,
val = tags[i][4] === undefined ? tags[i][5] : simplifyCDATA(tags[i][0]);
if(tag.indexOf("/") === 0){//ending tag
currentNode = currentNode.parent;
continue;
}
var selfClosingTag = attrsStr.charAt(attrsStr.length-1) === '/';
var childNode = new xmlNode(tag,currentNode);
if(selfClosingTag){
attrs = buildAttributesArr(attrsStr,options.ignoreTextNodeAttr,options.attrPrefix,options.ignoreNameSpace);
childNode.val = attrs || "";
currentNode.addChild(childNode);
}else if( ("/" + tag) === nexttag){ //Text node
attrs = buildAttributesArr(attrsStr,options.ignoreTextNodeAttr,options.attrPrefix,options.ignoreNameSpace);
val = parseValue(val,options.textNodeConversion);
if(attrs){
attrs[options.textNodeName] = val;
childNode.val = attrs;
}else{
childNode.val = val || "";
}
currentNode.addChild(childNode);
i++;
}else{//starting tag
attrs = buildAttributesArr(attrsStr,options.ignoreNonTextNodeAttr,options.attrPrefix,options.ignoreNameSpace);
if(attrs){
for (var prop in attrs) {
attrs.hasOwnProperty(prop) && childNode.addChild(new xmlNode(prop,childNode,attrs[prop]));
}
}
currentNode.addChild(childNode);
currentNode = childNode;
}
}
return xmlObj;
};
var xml2json = function (xmlData,options){
return convertToJson(getTraversalObj(xmlData,options));
};
var cdRegx = new RegExp("<!\\[CDATA\\[([^\\]\\]]*)\\]\\]>","g");
function simplifyCDATA(cdata){
var result = getAllMatches(cdata,cdRegx);
var val = "";
for (var i = 0; i < result.length ; i++) {
val+=result[i][1];
}
return val;
}
function resolveNameSpace(tagname,ignore){
if(ignore){
var tags = tagname.split(":");
var prefix = tagname.charAt(0) === "/" ? "/" : "";
if(tags.length === 2) {
tagname = prefix + tags[1];
}
}
return tagname;
}
function parseValue(val,conversion){
if(val){
if(!conversion || isNaN(val)){
val = "" + val ;
}else{
if(val.indexOf(".") !== -1){
val = Number.parseFloat(val);
}else{
val = Number.parseInt(val,10);
}
}
}else{
val = "";
}
return val;
}
//var attrsRegx = new RegExp("(\\S+)=\\s*[\"']?((?:.(?![\"']?\\s+(?:\\S+)=|[>\"']))+.)[\"']?","g");
//var attrsRegx = new RegExp("(\\S+)=\\s*(['\"])((?:.(?!\\2))*.)","g");
var attrsRegx = new RegExp("(\\S+)\\s*=\\s*(['\"])(.*?)\\2","g");
function buildAttributesArr(attrStr,ignore,prefix,ignoreNS){
attrStr = attrStr || attrStr.trim();
if(!ignore && attrStr.length > 3){
var matches = getAllMatches(attrStr,attrsRegx);
var attrs = {};
for (var i = 0; i < matches.length; i++) {
var attrName = prefix + resolveNameSpace( matches[i][1],ignoreNS);
attrs[attrName] = matches[i][3];
}
return attrs;
}
}
var convertToJson = function (node){
var jObj = {};
if(node.val || node.val === "") {
return node.val;
}else{
for (var index = 0; index < node.child.length; index++) {
var prop = node.child[index].tagname;
var obj = convertToJson(node.child[index]);
if(jObj[prop] !== undefined){
if(!Array.isArray(jObj[prop])){
var swap = jObj[prop];
jObj[prop] = [];
jObj[prop].push(swap);
}
jObj[prop].push(obj);
}else{
jObj[prop] = obj;
}
}
}
return jObj;
};
exports.parse = xml2json;
exports.getTraversalObj = getTraversalObj;
exports.convertToJson = convertToJson;
exports.validate = require("./validator").validate;
},{"./util":2,"./validator":3}],2:[function(require,module,exports){
var getAllMatches = function(string, regex) {
var matches = [];
var match = regex.exec(string);
while (match) {
var allmatches = [];
for (var index = 0; index < match.length; index++) {
allmatches.push(match[index]);
}
matches.push(allmatches);
match = regex.exec(string);
}
return matches;
};
var doesMatch = function(string,regex){
var match = regex.exec(string);
if(match === null || match === undefined) return false;
else return true;
}
var doesNotMatch = function(string,regex){
return !doesMatch(string,regex);
}
exports.doesMatch = doesMatch
exports.doesNotMatch = doesNotMatch
exports.getAllMatches = getAllMatches;
},{}],3:[function(require,module,exports){
var util = require("./util");
var tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g");
exports.validate = function(xmlData){
xmlData = xmlData.replace(/\n/g,"");//make it single line
xmlData = xmlData.replace(/(<!\[CDATA\[.*?\]\]>)/g,"");//Remove all CDATA
xmlData = xmlData.replace(/(<!--.*?(?:-->))/g,"");//Remove all comments
if(validateAttributes(xmlData) !== true) return false;
xmlData = xmlData.replace(/(\s+(?:[\w:\-]+)\s*=\s*(['\"]).*?\2)/g,"");//Remove all attributes
xmlData = xmlData.replace(/(^\s*<\?xml\s*\?>)/g,"");//Remove XML starting tag
if(xmlData.indexOf("<![CDATA[") > 0 || xmlData.indexOf("<!--") > 0 ) return false;
var tags = util.getAllMatches(xmlData,tagsPattern);
if(tags.length === 0) return false; //non xml string
var result = checkForMatchingTag(tags,0);
if(result !== true) return false; else return true;
}
var startsWithXML = new RegExp("^[Xx][Mm][Ll]");
var startsWith = new RegExp("^([a-zA-Z]|_)[\\w\.\\-_:]*");
function validateTagName(tagname){
if(util.doesMatch(tagname,startsWithXML)) return false;
else if(util.doesNotMatch(tagname,startsWith)) return false;
else return true;
}
var attrStringPattern = new RegExp("<[\\w:\\-_\.]+(.*?)\/?>","g");
var attrPattern = new RegExp("\\s+([\\w:\-]+)\\s*=\\s*(['\"])(.*?)\\2","g");
function validateAttributes(xmlData){
var attrStrings = util.getAllMatches(xmlData,attrStringPattern);
for (i=0;i<attrStrings.length;i++){
if(attrStrings[i][1].trim().length > 0 && attrStrings[i][1].trim().length < 4){ //invalid attributes
return false;
}else if(attrStrings[i][1].trim().length !== 0){
var attrsList = util.getAllMatches(attrStrings[i][1],attrPattern);
var attrNames=[];
for (j=0;j<attrsList.length;j++){
if(attrNames[attrsList[j][1]]){//duplicate attributes
return false;
}else{
attrNames[attrsList[j][1]]=1;
//validate attribute value
//if(!validateAttrValue(attrsList[3])) return false;
}
}
}
}
return true;
}
function checkForMatchingTag(tags,i){
if(tags.length === i) {
return true;
}else if(tags[i][0].indexOf("</") === 0) {//closing tag
return i;
}else if(tags[i][0].indexOf("/>") === tags[i][0].length-2){//Self closing tag
if(validateTagName(tags[i][0].substring(1)) === false) return -1;
return checkForMatchingTag(tags,i+1);
}else if(tags.length > i+1){
if(tags[i+1][0].indexOf("</") === 0){//next tag
if(validateTagName(tags[i][1]) === false) return -1;
if(tags[i][1] === tags[i+1][1]) {//matching with next closing tag
return checkForMatchingTag(tags,i+2);
}else {
return -1;//not matching
}
}else
var nextIndex = checkForMatchingTag(tags,i+1);
if(nextIndex !== -1 && tags[nextIndex][0].indexOf("</") === 0){
if(validateTagName(tags[i][1]) === false) return -1;
if(tags[i][1] === tags[nextIndex][1]) {
return checkForMatchingTag(tags,nextIndex+1);
}else {
return -1;//not matching
}
}
}
return -1;
}
},{"./util":2}]},{},[1])(1)
});