fast-xml-parser
Version:
Validate XML or Parse XML to JS/JSON very fast without C/C++ based libraries
302 lines (264 loc) • 10.6 kB
JavaScript
"use strict";
const util = require("./util");
const defaultOptions = {
allowBooleanAttributes: false //A tag can have attributes without any value
};
const buildOptions = function(options) {
if (!options) {
options = {};
}
const props = ["allowBooleanAttributes"];
for (let i = 0; i < props.length; i++) {
if (options[props[i]] === undefined) {
options[props[i]] = defaultOptions[props[i]];
}
}
return options;
};
//const tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g");
exports.validate = function(xmlData, options) {
options = buildOptions(options);
//xmlData = xmlData.replace(/(\r\n|\n|\r)/gm,"");//make it single line
//xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag
//xmlData = xmlData.replace(/(<!DOCTYPE[\s\w\"\.\/\-\:]+(\[.*\])*\s*>)/g,"");//Remove DOCTYPE
const tags = [];
let tagFound = false;
for (let i = 0; i < xmlData.length; i++) {
if (xmlData[i] === "<") {//starting of tag
//read until you reach to '>' avoiding any '>' in attribute value
i++;
if (xmlData[i] === "?") {
i = readPI(xmlData, ++i);
if (i.err) {
return i;
}
} else if (xmlData[i] === "!") {
i = readCommentAndCDATA(xmlData, i);
continue;
} else {
let closingTag = false;
if (xmlData[i] === "/") {//closing tag
closingTag = true;
i++;
}
//read tagname
let tagName = "";
for (; i < xmlData.length &&
xmlData[i] !== ">" &&
xmlData[i] !== " " &&
xmlData[i] !== "\t"; i++) {
tagName += xmlData[i];
}
tagName = tagName.trim();
//console.log(tagName);
if (tagName[tagName.length - 1] === "/") {//self closing tag without attributes
tagName = tagName.substring(0, tagName.length - 1);
continue;
}
if (!validateTagName(tagName)) {
return {err: {code: "InvalidTag", msg: "Tag " + tagName + " is an invalid name."}};
}
const result = readAttributeStr(xmlData, i);
if (result === false) {
return {err: {code: "InvalidAttr", msg: "Attributes for " + tagName + " have open quote"}};
}
let attrStr = result.value;
i = result.index;
if (attrStr[attrStr.length - 1] === "/") {//self closing tag
attrStr = attrStr.substring(0, attrStr.length - 1);
const isValid = validateAttributeString(attrStr, options);
if (isValid === true) {
tagFound = true;
continue;
} else {
return isValid;
}
} else if (closingTag) {
if (attrStr.trim().length > 0) {
return {err: {code: "InvalidTag", msg: "closing tag " + tagName + " can't have attributes or invalid starting."}};
} else {
const otg = tags.pop();
if (tagName !== otg) {
return {err: {code: "InvalidTag", msg: "closing tag " + otg + " is expected inplace of " + tagName + "."}};
}
}
} else {
const isValid = validateAttributeString(attrStr, options);
if (isValid !== true) {
return isValid;
}
tags.push(tagName);
tagFound = true;
}
//skip tag text value
//It may include comments and CDATA value
for (i++; i < xmlData.length; i++) {
if (xmlData[i] === "<") {
if (xmlData[i + 1] === "!") {//comment or CADATA
i++;
i = readCommentAndCDATA(xmlData, i);
continue;
} else {
break;
}
}
}//end of reading tag text value
if (xmlData[i] === "<") {
i--;
}
}
} else {
if (xmlData[i] === " " || xmlData[i] === "\t" || xmlData[i] === "\n" || xmlData[i] === "\r") {
continue;
}
return {err: {code: "InvalidChar", msg: "char " + xmlData[i] + " is not expected ."}};
}
}
if (!tagFound) {
return {err: {code: "InvalidXml", msg: "Start tag expected."}};
} else if (tags.length > 0) {
return {err: {code: "InvalidXml", msg: "Invalid " + JSON.stringify(tags, null, 4).replace(/\r?\n/g, "") + " found."}};
}
return true;
};
/**
* Read Processing insstructions and skip
* @param {*} xmlData
* @param {*} i
*/
function readPI(xmlData, i) {
var start = i;
for (; i < xmlData.length; i++) {
if (xmlData[i] == "?" || xmlData[i] == " ") {//tagname
var tagname = xmlData.substr(start, i - start);
if (i > 5 && tagname === "xml") {
return {err: {code: "InvalidXml", msg: "XML declaration allowed only at the start of the document."}};
} else if (xmlData[i] == "?" && xmlData[i + 1] == ">") {
//check if valid attribut string
i++;
break;
} else {
continue;
}
}
}
return i;
}
function readCommentAndCDATA(xmlData, i) {
if (xmlData.length > i + 5 && xmlData[i + 1] === "-" && xmlData[i + 2] === "-") {//comment
for (i += 3; i < xmlData.length; i++) {
if (xmlData[i] === "-" && xmlData[i + 1] === "-" && xmlData[i + 2] === ">") {
i += 2;
break;
}
}
} else if (xmlData.length > i + 8 &&
xmlData[i + 1] === "D" &&
xmlData[i + 2] === "O" &&
xmlData[i + 3] === "C" &&
xmlData[i + 4] === "T" &&
xmlData[i + 5] === "Y" &&
xmlData[i + 6] === "P" &&
xmlData[i + 7] === "E") {
let angleBracketsCount = 1;
for (i += 8; i < xmlData.length; i++) {
if (xmlData[i] === "<") {angleBracketsCount++;}
else if (xmlData[i] === ">") {
angleBracketsCount--;
if (angleBracketsCount === 0) {
break;
}
}
}
} else if (xmlData.length > i + 9 &&
xmlData[i + 1] === "[" &&
xmlData[i + 2] === "C" &&
xmlData[i + 3] === "D" &&
xmlData[i + 4] === "A" &&
xmlData[i + 5] === "T" &&
xmlData[i + 6] === "A" &&
xmlData[i + 7] === "[") {
for (i += 8; i < xmlData.length; i++) {
if (xmlData[i] === "]" && xmlData[i + 1] === "]" && xmlData[i + 2] === ">") {
i += 2;
break;
}
}
}
return i;
}
var doubleQuote = "\"";
var singleQuote = "'";
/**
* Keep reading xmlData until '<' is found outside the attribute value.
* @param {string} xmlData
* @param {number} i
*/
function readAttributeStr(xmlData, i) {
let attrStr = "";
let startChar = "";
for (; i < xmlData.length; i++) {
if (xmlData[i] === doubleQuote || xmlData[i] === singleQuote) {
if (startChar === "") {
startChar = xmlData[i];
} else if (startChar !== xmlData[i]) {
//if vaue is enclosed with double quote then single quotes are allowed inside the value and vice versa
continue;
} else {
startChar = "";
}
} else if (xmlData[i] === ">") {
if (startChar === "") {
break;
}
}
attrStr += xmlData[i];
}
if (startChar !== "") {
return false;
}
return {value: attrStr, index: i};
}
/**
* Select all the attributes whether valid or invalid.
*/
const validAttrStrRegxp = new RegExp("(\\s*)([^\\s=]+)(\\s*=)?(\\s*(['\"])(([\\s\\S])*?)\\5)?", "g");
//attr, ="sd", a="amit's", a="sd"b="saf", ab cd=""
function validateAttributeString(attrStr, options) {
//console.log("start:"+attrStr+":end");
//if(attrStr.trim().length === 0) return true; //empty string
const matches = util.getAllMatches(attrStr, validAttrStrRegxp);
const attrNames = [];
for (let i = 0; i < matches.length; i++) {
//console.log(matches[i]);
if (matches[i][1].length === 0) {//nospace before attribute name: a="sd"b="saf"
return {err: {code: "InvalidAttr", msg: "attribute " + matches[i][2] + " has no space in starting."}};
} else if (matches[i][3] === undefined && !options.allowBooleanAttributes) {//independent attribute: ab
return {err: {code: "InvalidAttr", msg: "boolean attribute " + matches[i][2] + " is not allowed."}};
}
/* else if(matches[i][6] === undefined){//attribute without value: ab=
return { err: { code:"InvalidAttr",msg:"attribute " + matches[i][2] + " has no value assigned."}};
} */
const attrName = matches[i][2];
if (!validateAttrName(attrName)) {
return {err: {code: "InvalidAttr", msg: "attribute " + attrName + " is an invalid name."}};
}
if (!attrNames.hasOwnProperty(attrName)) {//check for duplicate attribute.
attrNames[attrName] = 1;
} else {
return {err: {code: "InvalidAttr", msg: "attribute " + attrName + " is repeated."}};
}
}
return true;
}
const validAttrRegxp = /^[_a-zA-Z][\w\-.:]*$/;
function validateAttrName(attrName) {
return util.doesMatch(attrName, validAttrRegxp);
}
//const startsWithXML = new RegExp("^[Xx][Mm][Ll]");
const startsWith = /^([a-zA-Z]|_)[\w.\-_:]*/;
function validateTagName(tagname) {
/*if(util.doesMatch(tagname,startsWithXML)) return false;
else*/
return !util.doesNotMatch(tagname, startsWith);
}