xml-parser.ts
Version:
A lightweight, zero-dependency XML parser for TypeScript/JavaScript that converts XML strings to JavaScript objects and vice versa.
349 lines (348 loc) • 10.9 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.xml_to_json = xml_to_json;
exports.json_to_xml = json_to_xml;
exports.parse_xml_element = parse_xml_element;
exports.remove_xml_comments = remove_xml_comments;
exports.remove_xml_metadata = remove_xml_metadata;
/**
* Parse XML string to javascript object.
*
* Supported:
* - Nested elements
* - Text content (as string or number)
* - Multiple root elements
* - CDATA
* - Self-closing elements
*
* Ignored:
* - Metadata
* - Comments
*
* Not supported:
* - Attributes
* - Comments
* - Element with both child elements and text content
* - XML comments inside CDATA
*
* @description
* - Nested elements are parsed as object properties using tag name as property name.
* - Element with text content is parsed as string or number.
* - Element appears multiple times is parsed as array.
*
* @example
* XML content:
* ```xml
* <annotation>
* <folder>images</folder>
* <filename>maksssksksss0.png</filename>
* <size>
* <width>512</width>
* <height>366</height>
* </size>
* <object>
* <x>79</x>
* <y>105</y>
* </object>
* <object>
* <x>185</x>
* <y>100</y>
* </object>
* </annotation>
* ```
*
* Parsed javascript object:
* ```javascript
* {
* annotation: {
* folder: "images",
* filename: "maksssksksss0.png",
* size: {
* width: 512,
* height: 366,
* },
* object: [
* { x: 79, y: 105 },
* { x: 185, y: 100 }
* ]
* }
* }
* ```
*/
function xml_to_json(xml) {
xml = remove_xml_comments(xml);
xml = remove_xml_metadata(xml);
if (!xml.includes('<')) {
throw new Error('Invalid XML: no element found');
}
let properties = new Map();
let offset = 0;
for (;;) {
let element = parse_xml_element(xml, offset);
add_property(properties, element.tag_name, element.properties);
offset = element.offset;
if (offset == xml.length || xml.slice(offset).trim().length == 0) {
break;
}
}
return Object.fromEntries(properties);
}
/**
* Convert JavaScript object to XML string.
*
* @description
* - Object properties become XML elements using property names as tag names.
* - String and number values become text content.
* - Arrays create multiple elements with the same tag name.
* - Nested objects create nested XML elements.
* - Empty objects create self-closing elements.
*
* @example
* JavaScript object:
* ```javascript
* {
* annotation: {
* folder: "images",
* filename: "maksssksksss0.png",
* size: {
* width: 512,
* height: 366,
* },
* object: [
* { x: 79, y: 105 },
* { x: 185, y: 100 }
* ]
* }
* }
* ```
*
* Generated XML:
* ```xml
* <annotation>
* <folder>images</folder>
* <filename>maksssksksss0.png</filename>
* <size>
* <width>512</width>
* <height>366</height>
* </size>
* <object>
* <x>79</x>
* <y>105</y>
* </object>
* <object>
* <x>185</x>
* <y>100</y>
* </object>
* </annotation>
* ```
*/
function json_to_xml(object, options = {}) {
return recursive_to_xml(object, {
initial_indent: options.initial_indent ?? '',
indent_step: options.indent_step ?? ' ',
});
}
function recursive_to_xml(object, options) {
if (object === null || object === undefined) {
return '';
}
switch (typeof object) {
case 'string':
case 'number':
case 'boolean':
return String(object);
}
if (Array.isArray(object)) {
return object.map(item => recursive_to_xml(item, options)).join('');
}
if (typeof object === 'object') {
const entries = Object.entries(object);
if (entries.length === 0) {
return '';
}
let current_indent = options.initial_indent;
let next_indent = options.initial_indent + options.indent_step;
let xml = '';
for (const [key, value] of entries) {
if (value === null || value === undefined) {
xml += `${current_indent}<${key} />\n`;
}
else if (typeof value === 'string' ||
typeof value === 'number' ||
typeof value === 'boolean') {
xml += `${current_indent}<${key}>${value}</${key}>\n`;
}
else if (Array.isArray(value)) {
for (const item of value) {
if (typeof item === 'object' && item !== null) {
xml += `${current_indent}<${key}>\n`;
xml += recursive_to_xml(item, {
initial_indent: next_indent,
indent_step: options.indent_step,
});
xml += `${current_indent}</${key}>\n`;
}
else {
xml += `${current_indent}<${key}>${item}</${key}>\n`;
}
}
}
else if (typeof value === 'object') {
xml += `${current_indent}<${key}>\n`;
xml += recursive_to_xml(value, {
initial_indent: next_indent,
indent_step: options.indent_step,
});
xml += `${current_indent}</${key}>\n`;
}
}
return xml;
}
return '';
}
let cdata_start_pattern = '<![CDATA[';
let cdata_end_pattern = ']]>';
/**
* @description Parse a single root element from XML string.
* - Metadata and comments should be removed before passing to this function.
*/
function parse_xml_element(xml, offset) {
let tag_name_start_index = xml.indexOf('<', offset);
if (tag_name_start_index == -1) {
throw new Error(`Invalid XML: symbol "<" not found for element opening, offset: ${offset}`);
}
offset = tag_name_start_index + 1;
let tag_name_end_index = xml.indexOf('>', offset);
if (tag_name_end_index == -1) {
throw new Error(`Invalid XML: symbol ">" not found for element opening, offset: ${offset}`);
}
let tag_content = xml.slice(tag_name_start_index + 1, tag_name_end_index);
let content_start_index = tag_name_end_index + 1;
offset = content_start_index;
// Check if this is a self-closing element (e.g. '<box/>')
if (tag_content.endsWith('/')) {
// Self-closing element
let tag_name = tag_content.slice(0, -1).trim();
return {
tag_name,
properties: {},
text_content: null,
offset,
};
}
let tag_name = tag_content.trim();
let closing_tag = `</${tag_name}>`;
let element_end_index = xml.indexOf(closing_tag, offset);
if (element_end_index == -1) {
throw new Error(`Invalid XML: symbol "</${tag_name}>" not found for element closing, offset: ${offset}`);
}
let properties = new Map();
let text_content = null;
for (;;) {
// Find the next '<' character
let next_tag_index = xml.indexOf('<', offset);
if (next_tag_index == -1) {
throw new Error(`Invalid XML: symbol "<" not found for element closing or child element of element <${tag_name}>, offset: ${offset}`);
}
if (next_tag_index == element_end_index) {
// element ending
offset = element_end_index + closing_tag.length;
break;
}
// Check if this is a CDATA section
if (xml.slice(next_tag_index, next_tag_index + cdata_start_pattern.length) ===
cdata_start_pattern) {
// Parse CDATA
offset = next_tag_index + cdata_start_pattern.length;
let end_index = xml.indexOf(cdata_end_pattern, offset);
if (end_index == -1) {
throw new Error(`Invalid XML: symbol "]]>" not found for CDATA closing, offset: ${next_tag_index}`);
}
let cdata_content = xml.slice(offset, end_index);
if (text_content == null) {
text_content = '';
}
text_content += cdata_content;
offset = end_index + cdata_end_pattern.length;
continue;
}
// Parse regular child element
let child = parse_xml_element(xml, next_tag_index);
let value;
if (child.text_content != null) {
// object property
value = parse_text_content(child.text_content);
}
else {
// child element
value = child.properties;
}
add_property(properties, child.tag_name, value);
offset = child.offset;
}
if (properties.size == 0 && text_content == null) {
text_content = xml.slice(content_start_index, element_end_index).trim();
}
return {
tag_name,
properties: Object.fromEntries(properties),
text_content,
offset,
};
}
let comment_start_pattern = '<!--';
let comment_end_pattern = '-->';
function remove_xml_comments(xml) {
for (;;) {
let start_index = xml.indexOf(comment_start_pattern);
if (start_index == -1) {
return xml;
}
let end_index = xml.indexOf(comment_end_pattern, start_index);
if (end_index == -1) {
throw new Error(`Invalid XML: symbol "-->" not found for comment closing, offset: ${start_index}`);
}
let before = xml.slice(0, start_index);
let after = xml.slice(end_index + comment_end_pattern.length);
xml = before + after;
}
}
let metadata_start_pattern = '<?';
let metadata_end_pattern = '?>';
function remove_xml_metadata(xml) {
for (;;) {
let start_index = xml.indexOf(metadata_start_pattern);
if (start_index == -1) {
return xml;
}
let end_index = xml.indexOf(metadata_end_pattern, start_index);
if (end_index == -1) {
throw new Error(`Invalid XML: symbol "?>" not found for metadata closing, offset: ${start_index}`);
}
let before = xml.slice(0, start_index);
let after = xml.slice(end_index + metadata_end_pattern.length);
xml = before + after;
}
}
function add_property(properties, tag_name, value) {
if (!properties.has(tag_name)) {
// new property (single value)
properties.set(tag_name, value);
return;
}
let existing_value = properties.get(tag_name);
if (Array.isArray(existing_value)) {
// add to existing array
existing_value.push(value);
}
else {
// wrap values into a new array
properties.set(tag_name, [existing_value, value]);
}
}
function parse_text_content(text) {
if (text == '')
return '';
let number = +text;
return Number.isNaN(number) ? text : number;
}