fast-xml-parser
Version:
Validate XML, Parse XML, Build XML without C/C++ based libraries
903 lines (787 loc) • 31.8 kB
JavaScript
'use strict';
///@ts-check
import { getAllMatches, isExist, DANGEROUS_PROPERTY_NAMES, criticalProperties } from '../util.js';
import xmlNode from './xmlNode.js';
import DocTypeReader from './DocTypeReader.js';
import toNumber from "strnum";
import getIgnoreAttributesFn from "../ignoreAttributes.js";
import { Expression, Matcher } from 'path-expression-matcher';
// const regx =
// '<((!\\[CDATA\\[([\\s\\S]*?)(]]>))|((NAME:)?(NAME))([^>]*)>|((\\/)(NAME)\\s*>))([^<]*)'
// .replace(/NAME/g, util.nameRegexp);
//const tagsRegx = new RegExp("<(\\/?[\\w:\\-\._]+)([^>]*)>(\\s*"+cdataRegx+")*([^<]+)?","g");
//const tagsRegx = new RegExp("<(\\/?)((\\w*:)?([\\w:\\-\._]+))([^>]*)>([^<]*)("+cdataRegx+"([^<]*))*([^<]+)?","g");
// Helper functions for attribute and namespace handling
/**
* Extract raw attributes (without prefix) from prefixed attribute map
* @param {object} prefixedAttrs - Attributes with prefix from buildAttributesMap
* @param {object} options - Parser options containing attributeNamePrefix
* @returns {object} Raw attributes for matcher
*/
function extractRawAttributes(prefixedAttrs, options) {
if (!prefixedAttrs) return {};
// Handle attributesGroupName option
const attrs = options.attributesGroupName
? prefixedAttrs[options.attributesGroupName]
: prefixedAttrs;
if (!attrs) return {};
const rawAttrs = {};
for (const key in attrs) {
// Remove the attribute prefix to get raw name
if (key.startsWith(options.attributeNamePrefix)) {
const rawName = key.substring(options.attributeNamePrefix.length);
rawAttrs[rawName] = attrs[key];
} else {
// Attribute without prefix (shouldn't normally happen, but be safe)
rawAttrs[key] = attrs[key];
}
}
return rawAttrs;
}
/**
* Extract namespace from raw tag name
* @param {string} rawTagName - Tag name possibly with namespace (e.g., "soap:Envelope")
* @returns {string|undefined} Namespace or undefined
*/
function extractNamespace(rawTagName) {
if (!rawTagName || typeof rawTagName !== 'string') return undefined;
const colonIndex = rawTagName.indexOf(':');
if (colonIndex !== -1 && colonIndex > 0) {
const ns = rawTagName.substring(0, colonIndex);
// Don't treat xmlns as a namespace
if (ns !== 'xmlns') {
return ns;
}
}
return undefined;
}
export default class OrderedObjParser {
constructor(options) {
this.options = options;
this.currentNode = null;
this.tagsNodeStack = [];
this.docTypeEntities = {};
this.lastEntities = {
"apos": { regex: /&(apos|#39|#x27);/g, val: "'" },
"gt": { regex: /&(gt|#62|#x3E);/g, val: ">" },
"lt": { regex: /&(lt|#60|#x3C);/g, val: "<" },
"quot": { regex: /&(quot|#34|#x22);/g, val: "\"" },
};
this.ampEntity = { regex: /&(amp|#38|#x26);/g, val: "&" };
this.htmlEntities = {
"space": { regex: /&(nbsp|#160);/g, val: " " },
// "lt" : { regex: /&(lt|#60);/g, val: "<" },
// "gt" : { regex: /&(gt|#62);/g, val: ">" },
// "amp" : { regex: /&(amp|#38);/g, val: "&" },
// "quot" : { regex: /&(quot|#34);/g, val: "\"" },
// "apos" : { regex: /&(apos|#39);/g, val: "'" },
"cent": { regex: /&(cent|#162);/g, val: "¢" },
"pound": { regex: /&(pound|#163);/g, val: "£" },
"yen": { regex: /&(yen|#165);/g, val: "¥" },
"euro": { regex: /&(euro|#8364);/g, val: "€" },
"copyright": { regex: /&(copy|#169);/g, val: "©" },
"reg": { regex: /&(reg|#174);/g, val: "®" },
"inr": { regex: /&(inr|#8377);/g, val: "₹" },
"num_dec": { regex: /&#([0-9]{1,7});/g, val: (_, str) => fromCodePoint(str, 10, "&#") },
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val: (_, str) => fromCodePoint(str, 16, "&#x") },
};
this.addExternalEntities = addExternalEntities;
this.parseXml = parseXml;
this.parseTextData = parseTextData;
this.resolveNameSpace = resolveNameSpace;
this.buildAttributesMap = buildAttributesMap;
this.isItStopNode = isItStopNode;
this.replaceEntitiesValue = replaceEntitiesValue;
this.readStopNodeData = readStopNodeData;
this.saveTextToParentTag = saveTextToParentTag;
this.addChild = addChild;
this.ignoreAttributesFn = getIgnoreAttributesFn(this.options.ignoreAttributes)
this.entityExpansionCount = 0;
this.currentExpandedLength = 0;
// Initialize path matcher for path-expression-matcher
this.matcher = new Matcher();
// Live read-only proxy of matcher — PEM creates and caches this internally.
// All user callbacks receive this instead of the mutable matcher.
this.readonlyMatcher = this.matcher.readOnly();
// Flag to track if current node is a stop node (optimization)
this.isCurrentNodeStopNode = false;
// Pre-compile stopNodes expressions
if (this.options.stopNodes && this.options.stopNodes.length > 0) {
this.stopNodeExpressions = [];
for (let i = 0; i < this.options.stopNodes.length; i++) {
const stopNodeExp = this.options.stopNodes[i];
if (typeof stopNodeExp === 'string') {
// Convert string to Expression object
this.stopNodeExpressions.push(new Expression(stopNodeExp));
} else if (stopNodeExp instanceof Expression) {
// Already an Expression object
this.stopNodeExpressions.push(stopNodeExp);
}
}
}
}
}
function addExternalEntities(externalEntities) {
const entKeys = Object.keys(externalEntities);
for (let i = 0; i < entKeys.length; i++) {
const ent = entKeys[i];
const escaped = ent.replace(/[.\-+*:]/g, '\\.');
this.lastEntities[ent] = {
regex: new RegExp("&" + escaped + ";", "g"),
val: externalEntities[ent]
}
}
}
/**
* @param {string} val
* @param {string} tagName
* @param {string|Matcher} jPath - jPath string or Matcher instance based on options.jPath
* @param {boolean} dontTrim
* @param {boolean} hasAttributes
* @param {boolean} isLeafNode
* @param {boolean} escapeEntities
*/
function parseTextData(val, tagName, jPath, dontTrim, hasAttributes, isLeafNode, escapeEntities) {
if (val !== undefined) {
if (this.options.trimValues && !dontTrim) {
val = val.trim();
}
if (val.length > 0) {
if (!escapeEntities) val = this.replaceEntitiesValue(val, tagName, jPath);
// Pass jPath string or matcher based on options.jPath setting
const jPathOrMatcher = this.options.jPath ? jPath.toString() : jPath;
const newval = this.options.tagValueProcessor(tagName, val, jPathOrMatcher, hasAttributes, isLeafNode);
if (newval === null || newval === undefined) {
//don't parse
return val;
} else if (typeof newval !== typeof val || newval !== val) {
//overwrite
return newval;
} else if (this.options.trimValues) {
return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
} else {
const trimmedVal = val.trim();
if (trimmedVal === val) {
return parseValue(val, this.options.parseTagValue, this.options.numberParseOptions);
} else {
return val;
}
}
}
}
}
function resolveNameSpace(tagname) {
if (this.options.removeNSPrefix) {
const tags = tagname.split(':');
const prefix = tagname.charAt(0) === '/' ? '/' : '';
if (tags[0] === 'xmlns') {
return '';
}
if (tags.length === 2) {
tagname = prefix + tags[1];
}
}
return tagname;
}
//TODO: change regex to capture NS
//const attrsRegx = new RegExp("([\\w\\-\\.\\:]+)\\s*=\\s*(['\"])((.|\n)*?)\\2","gm");
const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
function buildAttributesMap(attrStr, jPath, tagName) {
if (this.options.ignoreAttributes !== true && typeof attrStr === 'string') {
// attrStr = attrStr.replace(/\r?\n/g, ' ');
//attrStr = attrStr || attrStr.trim();
const matches = getAllMatches(attrStr, attrsRegx);
const len = matches.length; //don't make it inline
const attrs = {};
// Pre-process values once: trim + entity replacement
// Reused in both matcher update and second pass
const processedVals = new Array(len);
let hasRawAttrs = false;
const rawAttrsForMatcher = {};
for (let i = 0; i < len; i++) {
const attrName = this.resolveNameSpace(matches[i][1]);
const oldVal = matches[i][4];
if (attrName.length && oldVal !== undefined) {
let val = oldVal;
if (this.options.trimValues) val = val.trim();
val = this.replaceEntitiesValue(val, tagName, this.readonlyMatcher);
processedVals[i] = val;
rawAttrsForMatcher[attrName] = val;
hasRawAttrs = true;
}
}
// Update matcher ONCE before second pass, if applicable
if (hasRawAttrs && typeof jPath === 'object' && jPath.updateCurrent) {
jPath.updateCurrent(rawAttrsForMatcher);
}
// Hoist toString() once — path doesn't change during attribute processing
const jPathStr = this.options.jPath ? jPath.toString() : this.readonlyMatcher;
// Second pass: apply processors, build final attrs
let hasAttrs = false;
for (let i = 0; i < len; i++) {
const attrName = this.resolveNameSpace(matches[i][1]);
if (this.ignoreAttributesFn(attrName, jPathStr)) continue;
let aName = this.options.attributeNamePrefix + attrName;
if (attrName.length) {
if (this.options.transformAttributeName) {
aName = this.options.transformAttributeName(aName);
}
aName = sanitizeName(aName, this.options);
if (matches[i][4] !== undefined) {
// Reuse already-processed value — no double entity replacement
const oldVal = processedVals[i];
const newVal = this.options.attributeValueProcessor(attrName, oldVal, jPathStr);
if (newVal === null || newVal === undefined) {
attrs[aName] = oldVal;
} else if (typeof newVal !== typeof oldVal || newVal !== oldVal) {
attrs[aName] = newVal;
} else {
attrs[aName] = parseValue(oldVal, this.options.parseAttributeValue, this.options.numberParseOptions);
}
hasAttrs = true;
} else if (this.options.allowBooleanAttributes) {
attrs[aName] = true;
hasAttrs = true;
}
}
}
if (!hasAttrs) return;
if (this.options.attributesGroupName) {
const attrCollection = {};
attrCollection[this.options.attributesGroupName] = attrs;
return attrCollection;
}
return attrs;
}
}
const parseXml = function (xmlData) {
xmlData = xmlData.replace(/\r\n?/g, "\n"); //TODO: remove this line
const xmlObj = new xmlNode('!xml');
let currentNode = xmlObj;
let textData = "";
// Reset matcher for new document
this.matcher.reset();
// Reset entity expansion counters for this document
this.entityExpansionCount = 0;
this.currentExpandedLength = 0;
const docTypeReader = new DocTypeReader(this.options.processEntities);
for (let i = 0; i < xmlData.length; i++) {//for each char in XML data
const ch = xmlData[i];
if (ch === '<') {
// const nextIndex = i+1;
// const _2ndChar = xmlData[nextIndex];
if (xmlData[i + 1] === '/') {//Closing Tag
const closeIndex = findClosingIndex(xmlData, ">", i, "Closing Tag is not closed.")
let tagName = xmlData.substring(i + 2, closeIndex).trim();
if (this.options.removeNSPrefix) {
const colonIndex = tagName.indexOf(":");
if (colonIndex !== -1) {
tagName = tagName.substr(colonIndex + 1);
}
}
tagName = transformTagName(this.options.transformTagName, tagName, "", this.options).tagName;
if (currentNode) {
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher);
}
//check if last tag of nested tag was unpaired tag
const lastTagName = this.matcher.getCurrentTag();
if (tagName && this.options.unpairedTags.indexOf(tagName) !== -1) {
throw new Error(`Unpaired tag can not be used as closing tag: </${tagName}>`);
}
if (lastTagName && this.options.unpairedTags.indexOf(lastTagName) !== -1) {
// Pop the unpaired tag
this.matcher.pop();
this.tagsNodeStack.pop();
}
// Pop the closing tag
this.matcher.pop();
this.isCurrentNodeStopNode = false; // Reset flag when closing tag
currentNode = this.tagsNodeStack.pop();//avoid recursion, set the parent tag scope
textData = "";
i = closeIndex;
} else if (xmlData[i + 1] === '?') {
let tagData = readTagExp(xmlData, i, false, "?>");
if (!tagData) throw new Error("Pi Tag is not closed.");
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher);
if ((this.options.ignoreDeclaration && tagData.tagName === "?xml") || this.options.ignorePiTags) {
//do nothing
} else {
const childNode = new xmlNode(tagData.tagName);
childNode.add(this.options.textNodeName, "");
if (tagData.tagName !== tagData.tagExp && tagData.attrExpPresent) {
childNode[":@"] = this.buildAttributesMap(tagData.tagExp, this.matcher, tagData.tagName);
}
this.addChild(currentNode, childNode, this.readonlyMatcher, i);
}
i = tagData.closeIndex + 1;
} else if (xmlData.substr(i + 1, 3) === '!--') {
const endIndex = findClosingIndex(xmlData, "-->", i + 4, "Comment is not closed.")
if (this.options.commentPropName) {
const comment = xmlData.substring(i + 4, endIndex - 2);
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher);
currentNode.add(this.options.commentPropName, [{ [this.options.textNodeName]: comment }]);
}
i = endIndex;
} else if (xmlData.substr(i + 1, 2) === '!D') {
const result = docTypeReader.readDocType(xmlData, i);
this.docTypeEntities = result.entities;
i = result.i;
} else if (xmlData.substr(i + 1, 2) === '![') {
const closeIndex = findClosingIndex(xmlData, "]]>", i, "CDATA is not closed.") - 2;
const tagExp = xmlData.substring(i + 9, closeIndex);
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher);
let val = this.parseTextData(tagExp, currentNode.tagname, this.readonlyMatcher, true, false, true, true);
if (val == undefined) val = "";
//cdata should be set even if it is 0 length string
if (this.options.cdataPropName) {
currentNode.add(this.options.cdataPropName, [{ [this.options.textNodeName]: tagExp }]);
} else {
currentNode.add(this.options.textNodeName, val);
}
i = closeIndex + 2;
} else {//Opening tag
let result = readTagExp(xmlData, i, this.options.removeNSPrefix);
// Safety check: readTagExp can return undefined
if (!result) {
// Log context for debugging
const context = xmlData.substring(Math.max(0, i - 50), Math.min(xmlData.length, i + 50));
throw new Error(`readTagExp returned undefined at position ${i}. Context: "${context}"`);
}
let tagName = result.tagName;
const rawTagName = result.rawTagName;
let tagExp = result.tagExp;
let attrExpPresent = result.attrExpPresent;
let closeIndex = result.closeIndex;
({ tagName, tagExp } = transformTagName(this.options.transformTagName, tagName, tagExp, this.options));
if (this.options.strictReservedNames &&
(tagName === this.options.commentPropName
|| tagName === this.options.cdataPropName
|| tagName === this.options.textNodeName
|| tagName === this.options.attributesGroupName
)) {
throw new Error(`Invalid tag name: ${tagName}`);
}
//save text as child node
if (currentNode && textData) {
if (currentNode.tagname !== '!xml') {
//when nested tag is found
textData = this.saveTextToParentTag(textData, currentNode, this.readonlyMatcher, false);
}
}
//check if last tag was unpaired tag
const lastTag = currentNode;
if (lastTag && this.options.unpairedTags.indexOf(lastTag.tagname) !== -1) {
currentNode = this.tagsNodeStack.pop();
this.matcher.pop();
}
// Clean up self-closing syntax BEFORE processing attributes
// This is where tagExp gets the trailing / removed
let isSelfClosing = false;
if (tagExp.length > 0 && tagExp.lastIndexOf("/") === tagExp.length - 1) {
isSelfClosing = true;
if (tagName[tagName.length - 1] === "/") {
tagName = tagName.substr(0, tagName.length - 1);
tagExp = tagName;
} else {
tagExp = tagExp.substr(0, tagExp.length - 1);
}
// Re-check attrExpPresent after cleaning
attrExpPresent = (tagName !== tagExp);
}
// Now process attributes with CLEAN tagExp (no trailing /)
let prefixedAttrs = null;
let rawAttrs = {};
let namespace = undefined;
// Extract namespace from rawTagName
namespace = extractNamespace(rawTagName);
// Push tag to matcher FIRST (with empty attrs for now) so callbacks see correct path
if (tagName !== xmlObj.tagname) {
this.matcher.push(tagName, {}, namespace);
}
// Now build attributes - callbacks will see correct matcher state
if (tagName !== tagExp && attrExpPresent) {
// Build attributes (returns prefixed attributes for the tree)
// Note: buildAttributesMap now internally updates the matcher with raw attributes
prefixedAttrs = this.buildAttributesMap(tagExp, this.matcher, tagName);
if (prefixedAttrs) {
// Extract raw attributes (without prefix) for our use
rawAttrs = extractRawAttributes(prefixedAttrs, this.options);
}
}
// Now check if this is a stop node (after attributes are set)
if (tagName !== xmlObj.tagname) {
this.isCurrentNodeStopNode = this.isItStopNode(this.stopNodeExpressions, this.matcher);
}
const startIndex = i;
if (this.isCurrentNodeStopNode) {
let tagContent = "";
// For self-closing tags, content is empty
if (isSelfClosing) {
i = result.closeIndex;
}
//unpaired tag
else if (this.options.unpairedTags.indexOf(tagName) !== -1) {
i = result.closeIndex;
}
//normal tag
else {
//read until closing tag is found
const result = this.readStopNodeData(xmlData, rawTagName, closeIndex + 1);
if (!result) throw new Error(`Unexpected end of ${rawTagName}`);
i = result.i;
tagContent = result.tagContent;
}
const childNode = new xmlNode(tagName);
if (prefixedAttrs) {
childNode[":@"] = prefixedAttrs;
}
// For stop nodes, store raw content as-is without any processing
childNode.add(this.options.textNodeName, tagContent);
this.matcher.pop(); // Pop the stop node tag
this.isCurrentNodeStopNode = false; // Reset flag
this.addChild(currentNode, childNode, this.readonlyMatcher, startIndex);
} else {
//selfClosing tag
if (isSelfClosing) {
({ tagName, tagExp } = transformTagName(this.options.transformTagName, tagName, tagExp, this.options));
const childNode = new xmlNode(tagName);
if (prefixedAttrs) {
childNode[":@"] = prefixedAttrs;
}
this.addChild(currentNode, childNode, this.readonlyMatcher, startIndex);
this.matcher.pop(); // Pop self-closing tag
this.isCurrentNodeStopNode = false; // Reset flag
}
else if (this.options.unpairedTags.indexOf(tagName) !== -1) {//unpaired tag
const childNode = new xmlNode(tagName);
if (prefixedAttrs) {
childNode[":@"] = prefixedAttrs;
}
this.addChild(currentNode, childNode, this.readonlyMatcher, startIndex);
this.matcher.pop(); // Pop unpaired tag
this.isCurrentNodeStopNode = false; // Reset flag
i = result.closeIndex;
// Continue to next iteration without changing currentNode
continue;
}
//opening tag
else {
const childNode = new xmlNode(tagName);
if (this.tagsNodeStack.length > this.options.maxNestedTags) {
throw new Error("Maximum nested tags exceeded");
}
this.tagsNodeStack.push(currentNode);
if (prefixedAttrs) {
childNode[":@"] = prefixedAttrs;
}
this.addChild(currentNode, childNode, this.readonlyMatcher, startIndex);
currentNode = childNode;
}
textData = "";
i = closeIndex;
}
}
} else {
textData += xmlData[i];
}
}
return xmlObj.child;
}
function addChild(currentNode, childNode, matcher, startIndex) {
// unset startIndex if not requested
if (!this.options.captureMetaData) startIndex = undefined;
// Pass jPath string or matcher based on options.jPath setting
const jPathOrMatcher = this.options.jPath ? matcher.toString() : matcher;
const result = this.options.updateTag(childNode.tagname, jPathOrMatcher, childNode[":@"])
if (result === false) {
//do nothing
} else if (typeof result === "string") {
childNode.tagname = result
currentNode.addChild(childNode, startIndex);
} else {
currentNode.addChild(childNode, startIndex);
}
}
/**
* @param {object} val - Entity object with regex and val properties
* @param {string} tagName - Tag name
* @param {string|Matcher} jPath - jPath string or Matcher instance based on options.jPath
*/
function replaceEntitiesValue(val, tagName, jPath) {
const entityConfig = this.options.processEntities;
if (!entityConfig || !entityConfig.enabled) {
return val;
}
// Check if tag is allowed to contain entities
if (entityConfig.allowedTags) {
const jPathOrMatcher = this.options.jPath ? jPath.toString() : jPath;
const allowed = Array.isArray(entityConfig.allowedTags)
? entityConfig.allowedTags.includes(tagName)
: entityConfig.allowedTags(tagName, jPathOrMatcher);
if (!allowed) {
return val;
}
}
// Apply custom tag filter if provided
if (entityConfig.tagFilter) {
const jPathOrMatcher = this.options.jPath ? jPath.toString() : jPath;
if (!entityConfig.tagFilter(tagName, jPathOrMatcher)) {
return val; // Skip based on custom filter
}
}
// Replace DOCTYPE entities
for (const entityName of Object.keys(this.docTypeEntities)) {
const entity = this.docTypeEntities[entityName];
const matches = val.match(entity.regx);
if (matches) {
// Track expansions
this.entityExpansionCount += matches.length;
// Check expansion limit
if (entityConfig.maxTotalExpansions &&
this.entityExpansionCount > entityConfig.maxTotalExpansions) {
throw new Error(
`Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
);
}
// Store length before replacement
const lengthBefore = val.length;
val = val.replace(entity.regx, entity.val);
// Check expanded length immediately after replacement
if (entityConfig.maxExpandedLength) {
this.currentExpandedLength += (val.length - lengthBefore);
if (this.currentExpandedLength > entityConfig.maxExpandedLength) {
throw new Error(
`Total expanded content size exceeded: ${this.currentExpandedLength} > ${entityConfig.maxExpandedLength}`
);
}
}
}
}
if (val.indexOf('&') === -1) return val;
// Replace standard entities
for (const entityName of Object.keys(this.lastEntities)) {
const entity = this.lastEntities[entityName];
const matches = val.match(entity.regex);
if (matches) {
this.entityExpansionCount += matches.length;
if (entityConfig.maxTotalExpansions &&
this.entityExpansionCount > entityConfig.maxTotalExpansions) {
throw new Error(
`Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
);
}
}
val = val.replace(entity.regex, entity.val);
}
if (val.indexOf('&') === -1) return val;
// Replace HTML entities if enabled
if (this.options.htmlEntities) {
for (const entityName of Object.keys(this.htmlEntities)) {
const entity = this.htmlEntities[entityName];
const matches = val.match(entity.regex);
if (matches) {
//console.log(matches);
this.entityExpansionCount += matches.length;
if (entityConfig.maxTotalExpansions &&
this.entityExpansionCount > entityConfig.maxTotalExpansions) {
throw new Error(
`Entity expansion limit exceeded: ${this.entityExpansionCount} > ${entityConfig.maxTotalExpansions}`
);
}
}
val = val.replace(entity.regex, entity.val);
}
}
// Replace ampersand entity last
val = val.replace(this.ampEntity.regex, this.ampEntity.val);
return val;
}
function saveTextToParentTag(textData, parentNode, matcher, isLeafNode) {
if (textData) { //store previously collected data as textNode
if (isLeafNode === undefined) isLeafNode = parentNode.child.length === 0
textData = this.parseTextData(textData,
parentNode.tagname,
matcher,
false,
parentNode[":@"] ? Object.keys(parentNode[":@"]).length !== 0 : false,
isLeafNode);
if (textData !== undefined && textData !== "")
parentNode.add(this.options.textNodeName, textData);
textData = "";
}
return textData;
}
//TODO: use jPath to simplify the logic
/**
* @param {Array<Expression>} stopNodeExpressions - Array of compiled Expression objects
* @param {Matcher} matcher - Current path matcher
*/
function isItStopNode(stopNodeExpressions, matcher) {
if (!stopNodeExpressions || stopNodeExpressions.length === 0) return false;
for (let i = 0; i < stopNodeExpressions.length; i++) {
if (matcher.matches(stopNodeExpressions[i])) {
return true;
}
}
return false;
}
/**
* Returns the tag Expression and where it is ending handling single-double quotes situation
* @param {string} xmlData
* @param {number} i starting index
* @returns
*/
function tagExpWithClosingIndex(xmlData, i, closingChar = ">") {
let attrBoundary;
let tagExp = "";
for (let index = i; index < xmlData.length; index++) {
let ch = xmlData[index];
if (attrBoundary) {
if (ch === attrBoundary) attrBoundary = "";//reset
} else if (ch === '"' || ch === "'") {
attrBoundary = ch;
} else if (ch === closingChar[0]) {
if (closingChar[1]) {
if (xmlData[index + 1] === closingChar[1]) {
return {
data: tagExp,
index: index
}
}
} else {
return {
data: tagExp,
index: index
}
}
} else if (ch === '\t') {
ch = " "
}
tagExp += ch;
}
}
function findClosingIndex(xmlData, str, i, errMsg) {
const closingIndex = xmlData.indexOf(str, i);
if (closingIndex === -1) {
throw new Error(errMsg)
} else {
return closingIndex + str.length - 1;
}
}
function readTagExp(xmlData, i, removeNSPrefix, closingChar = ">") {
const result = tagExpWithClosingIndex(xmlData, i + 1, closingChar);
if (!result) return;
let tagExp = result.data;
const closeIndex = result.index;
const separatorIndex = tagExp.search(/\s/);
let tagName = tagExp;
let attrExpPresent = true;
if (separatorIndex !== -1) {//separate tag name and attributes expression
tagName = tagExp.substring(0, separatorIndex);
tagExp = tagExp.substring(separatorIndex + 1).trimStart();
}
const rawTagName = tagName;
if (removeNSPrefix) {
const colonIndex = tagName.indexOf(":");
if (colonIndex !== -1) {
tagName = tagName.substr(colonIndex + 1);
attrExpPresent = tagName !== result.data.substr(colonIndex + 1);
}
}
return {
tagName: tagName,
tagExp: tagExp,
closeIndex: closeIndex,
attrExpPresent: attrExpPresent,
rawTagName: rawTagName,
}
}
/**
* find paired tag for a stop node
* @param {string} xmlData
* @param {string} tagName
* @param {number} i
*/
function readStopNodeData(xmlData, tagName, i) {
const startIndex = i;
// Starting at 1 since we already have an open tag
let openTagCount = 1;
for (; i < xmlData.length; i++) {
if (xmlData[i] === "<") {
if (xmlData[i + 1] === "/") {//close tag
const closeIndex = findClosingIndex(xmlData, ">", i, `${tagName} is not closed`);
let closeTagName = xmlData.substring(i + 2, closeIndex).trim();
if (closeTagName === tagName) {
openTagCount--;
if (openTagCount === 0) {
return {
tagContent: xmlData.substring(startIndex, i),
i: closeIndex
}
}
}
i = closeIndex;
} else if (xmlData[i + 1] === '?') {
const closeIndex = findClosingIndex(xmlData, "?>", i + 1, "StopNode is not closed.")
i = closeIndex;
} else if (xmlData.substr(i + 1, 3) === '!--') {
const closeIndex = findClosingIndex(xmlData, "-->", i + 3, "StopNode is not closed.")
i = closeIndex;
} else if (xmlData.substr(i + 1, 2) === '![') {
const closeIndex = findClosingIndex(xmlData, "]]>", i, "StopNode is not closed.") - 2;
i = closeIndex;
} else {
const tagData = readTagExp(xmlData, i, '>')
if (tagData) {
const openTagName = tagData && tagData.tagName;
if (openTagName === tagName && tagData.tagExp[tagData.tagExp.length - 1] !== "/") {
openTagCount++;
}
i = tagData.closeIndex;
}
}
}
}//end for loop
}
function parseValue(val, shouldParse, options) {
if (shouldParse && typeof val === 'string') {
//console.log(options)
const newval = val.trim();
if (newval === 'true') return true;
else if (newval === 'false') return false;
else return toNumber(val, options);
} else {
if (isExist(val)) {
return val;
} else {
return '';
}
}
}
function fromCodePoint(str, base, prefix) {
const codePoint = Number.parseInt(str, base);
if (codePoint >= 0 && codePoint <= 0x10FFFF) {
return String.fromCodePoint(codePoint);
} else {
return prefix + str + ";";
}
}
function transformTagName(fn, tagName, tagExp, options) {
if (fn) {
const newTagName = fn(tagName);
if (tagExp === tagName) {
tagExp = newTagName
}
tagName = newTagName;
}
tagName = sanitizeName(tagName, options);
return { tagName, tagExp };
}
function sanitizeName(name, options) {
if (criticalProperties.includes(name)) {
throw new Error(`[SECURITY] Invalid name: "${name}" is a reserved JavaScript keyword that could cause prototype pollution`);
} else if (DANGEROUS_PROPERTY_NAMES.includes(name)) {
return options.onDangerousProperty(name);
}
return name;
}