UNPKG

partial-xml-stream-parser

Version:

A lenient XML stream parser for Node.js and browsers that can handle incomplete or malformed XML data, with depth control, CDATA support for XML serialization and round-trip parsing, wildcard pattern support for stopNodes, and CDATA handling within stopNo

1,629 lines (1,517 loc) 91.4 kB
import { describe, it, expect, beforeEach } from "vitest" import { PartialXMLStreamParser, xmlObjectToString } from "../index" describe("PartialXMLStreamParser", () => { let parser: PartialXMLStreamParser beforeEach(() => { // Default parser for most tests, now implies alwaysCreateTextNode: true parser = new PartialXMLStreamParser({ textNodeName: "#text" }) }) it("should parse a stream chunk by chunk correctly", () => { let streamResult streamResult = parser.parseStream("<read>") expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ read: {} }], }) streamResult = parser.parseStream("<args>") expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ read: { args: {} } }], }) streamResult = parser.parseStream("<file><name>as") expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ read: { args: { file: { name: { "#text": "as" } } } } }], }) streamResult = parser.parseStream("d</name>") expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ read: { args: { file: { name: { "#text": "asd" } } } } }], }) streamResult = parser.parseStream("</file></args>") expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ read: { args: { file: { name: { "#text": "asd" } } } } }], }) streamResult = parser.parseStream("</read>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ read: { args: { file: { name: { "#text": "asd" } } } } }], }) streamResult = parser.parseStream(null) // Signal end of stream expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ read: { args: { file: { name: { "#text": "asd" } } } } }], }) }) it("should handle a single incomplete chunk, then completion", () => { let streamResult const singleChunk = "<request><id>123</id><data>value<da" streamResult = parser.parseStream(singleChunk) expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ request: { id: { "#text": "123" }, data: { "#text": "value<da" } } }], }) streamResult = parser.parseStream("ta></request>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { request: { id: { "#text": "123" }, data: { "#text": "value<data>" }, }, }, ], }) streamResult = parser.parseStream(null) // Signal end expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { request: { id: { "#text": "123" }, data: { "#text": "value<data>" }, }, }, ], }) }) it("should handle a text-only stream", () => { let streamResult streamResult = parser.parseStream("Just some text") expect(streamResult).toEqual({ metadata: { partial: false }, xml: ["Just some text"], }) streamResult = parser.parseStream(null) // End stream expect(streamResult).toEqual({ metadata: { partial: false }, xml: ["Just some text"], }) }) it("should handle self-closing tags and mixed content", () => { let streamResult streamResult = parser.parseStream("<root><item/>Text after item<another/></root>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { item: {}, "#text": "Text after item", another: {} } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { item: {}, "#text": "Text after item", another: {} } }], }) }) it("should handle XML entities in text nodes", () => { parser = new PartialXMLStreamParser({ textNodeName: "#text" }) // Re-init to be sure about options let streamResult = parser.parseStream("<doc>Hello & \"World\" 'Test'</doc>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ doc: { "#text": "Hello & \"World\" 'Test'" } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ doc: { "#text": "Hello & \"World\" 'Test'" } }], }) }) it("should handle XML entities in attribute values", () => { parser = new PartialXMLStreamParser({ textNodeName: "#text", attributeNamePrefix: "@", }) let streamResult = parser.parseStream('<doc val="&lt;value&gt;" />') expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ doc: { "@val": "<value>" } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ doc: { "@val": "<value>" } }], }) }) it("should handle numeric XML entities (decimal and hex)", () => { parser = new PartialXMLStreamParser({ textNodeName: "#text" }) let streamResult = parser.parseStream("<doc>&#60;Hello&#x26;&#32;World&#x3E;</doc>") // <Hello& World> expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ doc: { "#text": "<Hello& World>" } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ doc: { "#text": "<Hello& World>" } }], }) }) it("should correctly parse multiple chunks that form a complete XML", () => { parser = new PartialXMLStreamParser({ textNodeName: "#text", attributeNamePrefix: "@", }) parser.parseStream("<data><item") parser.parseStream(' key="value">Te') parser.parseStream("st</item><item2") let streamResult = parser.parseStream("/></data>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ data: { item: { "@key": "value", "#text": "Test" }, item2: {} } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ data: { item: { "@key": "value", "#text": "Test" }, item2: {} } }], }) }) it("should return empty array xml for empty stream", () => { parser = new PartialXMLStreamParser() let streamResult = parser.parseStream("") expect(streamResult).toEqual({ metadata: { partial: true }, xml: [] }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [] }) }) it("should handle stream with only XML declaration and comments", () => { parser = new PartialXMLStreamParser() let streamResult = parser.parseStream('<?xml version="1.0"?><!-- comment -->') expect(streamResult).toEqual({ metadata: { partial: false }, xml: [] }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [] }) }) it("should handle custom attributeNamePrefix", () => { parser = new PartialXMLStreamParser({ attributeNamePrefix: "_" }) let streamResult = parser.parseStream('<doc attr="val" />') expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ doc: { _attr: "val" } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ doc: { _attr: "val" } }], }) parser = new PartialXMLStreamParser({ attributeNamePrefix: "" }) streamResult = parser.parseStream('<doc attr="val" />') expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ doc: { attr: "val" } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ doc: { attr: "val" } }], }) }) it("should parse CDATA sections correctly", () => { parser = new PartialXMLStreamParser({ textNodeName: "#text" }) let streamResult = parser.parseStream("<root><![CDATA[This is <CDATA> text with & special chars]]></root>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { "#text": "This is <CDATA> text with & special chars" } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { "#text": "This is <CDATA> text with & special chars" } }], }) }) it("should handle unterminated CDATA section", () => { parser = new PartialXMLStreamParser({ textNodeName: "#text" }) let streamResult = parser.parseStream("<root><![CDATA[Unterminated cdata") expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ root: { "#text": "Unterminated cdata" } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ root: { "#text": "Unterminated cdata" } }], }) }) it("should handle CDATA at root level if it is the only content", () => { parser = new PartialXMLStreamParser() let streamResult = parser.parseStream("<![CDATA[Root CDATA]]>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: ["Root CDATA"], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: ["Root CDATA"], }) }) it("should handle unterminated comments", () => { parser = new PartialXMLStreamParser() let streamResult = parser.parseStream("<root><!-- This is an unterminated comment") expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ root: {} }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ root: {} }], }) }) it("should handle unterminated DOCTYPE", () => { parser = new PartialXMLStreamParser() let streamResult = parser.parseStream('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"') expect(streamResult).toEqual({ metadata: { partial: true }, xml: [] }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [] }) }) it("should handle unterminated XML declaration", () => { parser = new PartialXMLStreamParser() let streamResult = parser.parseStream('<?xml version="1.0" encoding="UTF-8"') expect(streamResult).toEqual({ metadata: { partial: true }, xml: [] }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [] }) }) it("should leniently handle mismatched closing tags", () => { parser = new PartialXMLStreamParser({ textNodeName: "#text" }) let streamResult = parser.parseStream("<root><item>text</mismatched></item></root>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { item: { "#text": "text</mismatched>" } } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { item: { "#text": "text</mismatched>" } } }], }) }) it("should handle attributes without explicit values (boolean attributes) as true", () => { parser = new PartialXMLStreamParser({ attributeNamePrefix: "@" }) let streamResult = parser.parseStream('<input disabled checked="checked" required />') expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { input: { "@disabled": true, "@checked": "checked", "@required": true, }, }, ], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { input: { "@disabled": true, "@checked": "checked", "@required": true, }, }, ], }) }) it("should correctly simplify text-only elements", () => { // This test now reflects alwaysCreateTextNode: true behavior from beforeEach let streamResult = parser.parseStream("<parent><child>simple text</child></parent>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ parent: { child: { "#text": "simple text" } } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ parent: { child: { "#text": "simple text" } } }], }) }) it("should not simplify elements with attributes even if they also have text", () => { // This test already aligns with alwaysCreateTextNode: true behavior parser = new PartialXMLStreamParser({ textNodeName: "#text", attributeNamePrefix: "@", }) let streamResult = parser.parseStream('<parent><child attr="val">text content</child></parent>') expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ parent: { child: { "@attr": "val", "#text": "text content" } } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ parent: { child: { "@attr": "val", "#text": "text content" } } }], }) }) it("should not simplify elements with child elements", () => { // This test's expectation doesn't change with alwaysCreateTextNode parser = new PartialXMLStreamParser() // Uses new default alwaysCreateTextNode: true let streamResult = parser.parseStream("<parent><child><grandchild/></child></parent>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ parent: { child: { grandchild: {} } } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ parent: { child: { grandchild: {} } } }], }) }) it("should ignore text nodes containing only whitespace by default", () => { // Expectation changes due to alwaysCreateTextNode: true from beforeEach let streamResult = parser.parseStream("<root> <item>text</item> </root>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { item: { "#text": "text" } } }], // Whitespace around item is trimmed, text inside item gets #text }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { item: { "#text": "text" } } }], }) }) it("should omit whitespace text nodes even if alwaysCreateTextNode is true", () => { parser = new PartialXMLStreamParser({ textNodeName: "#text", alwaysCreateTextNode: true, }) let streamResult = parser.parseStream("<root> <item>text</item> </root>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { item: { "#text": "text" } } }], // Whitespace-only nodes between tags are omitted }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { item: { "#text": "text" } } }], }) }) it("should handle text at root level before any tags", () => { parser = new PartialXMLStreamParser() // Uses new default let streamResult = parser.parseStream("Leading text<root/>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: ["Leading text", { root: {} }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: ["Leading text", { root: {} }], }) }) it("should handle text at root level after all tags are closed", () => { parser = new PartialXMLStreamParser() // Uses new default let streamResult = parser.parseStream("<root/>Trailing text") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: {} }, "Trailing text"], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: {} }, "Trailing text"], }) }) it("should handle multiple root elements", () => { // Expectation changes due to alwaysCreateTextNode: true from beforeEach let streamResult = parser.parseStream("<rootA/><rootB>text</rootB>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ rootA: {} }, { rootB: { "#text": "text" } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ rootA: {} }, { rootB: { "#text": "text" } }], }) }) it("should handle multiple root elements in specific order", () => { // Expectation changes due to alwaysCreateTextNode: true from beforeEach const xml = "<thinking>a</thinking><some-tool></some-tool>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ thinking: { "#text": "a" } }, { "some-tool": {} }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ thinking: { "#text": "a" } }, { "some-tool": {} }], }) }) it("should handle Buffer input", () => { // Expectation changes due to alwaysCreateTextNode: true from beforeEach let streamResult = parser.parseStream(Buffer.from("<data>value</data>")) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ data: { "#text": "value" } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ data: { "#text": "value" } }], }) }) it("should handle multiple attributes correctly", () => { parser = new PartialXMLStreamParser({ attributeNamePrefix: "@" }) let streamResult = parser.parseStream("<tag attr1=\"val1\" attr2='val2' attr3=val3 />") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ tag: { "@attr1": "val1", "@attr2": "val2", "@attr3": "val3" } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ tag: { "@attr1": "val1", "@attr2": "val2", "@attr3": "val3" } }], }) }) it("should handle incomplete tags at end of chunk and then completed", () => { parser = new PartialXMLStreamParser({ // Uses new default alwaysCreateTextNode: true textNodeName: "#text", attributeNamePrefix: "@", }) parser.parseStream("<root><item") let streamResult = parser.parseStream(" attr='1'>Text</item></r") expect(streamResult!.xml![0].root.item).toEqual({ "@attr": "1", "#text": "Text", }) expect(streamResult!.xml![0].root["#text"]).toBe("</r") // This part becomes text expect(streamResult!.metadata.partial).toBe(true) parser = new PartialXMLStreamParser({ textNodeName: "#text", attributeNamePrefix: "@", }) parser.parseStream("<root><item") streamResult = parser.parseStream(" attr='1'>Text</item></root>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { item: { "@attr": "1", "#text": "Text" } } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { item: { "@attr": "1", "#text": "Text" } } }], }) }) it("should handle empty string chunks in midst of stream", () => { // Expectation changes due to alwaysCreateTextNode: true from beforeEach parser.parseStream("<doc>") parser.parseStream("") let streamResult = parser.parseStream("<content>Hello</content>") expect(streamResult!.xml![0].doc.content).toEqual({ "#text": "Hello" }) expect(streamResult!.metadata.partial).toBe(true) let finalDocStreamResult = parser.parseStream("</doc>") expect(finalDocStreamResult!.xml![0].doc.content).toEqual({ "#text": "Hello", }) expect(finalDocStreamResult!.metadata.partial).toBe(false) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ doc: { content: { "#text": "Hello" } } }], }) }) it("should set partial:true when stream ends with an incomplete tag", () => { parser = new PartialXMLStreamParser({ textNodeName: "#text" }) let streamResult = parser.parseStream("<root><incompleteTag") streamResult = parser.parseStream(null) // End stream expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ root: { "#text": "<incompleteTag" } }], // The fragment is treated as text of parent }) parser.reset() parser = new PartialXMLStreamParser({ textNodeName: "#text" }) streamResult = parser.parseStream("<root><item>Text</item></incompleteCl") streamResult = parser.parseStream(null) // End stream expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ root: { item: { "#text": "Text" }, "#text": "</incompleteCl" } }], // Fragment as text }) parser.reset() parser = new PartialXMLStreamParser({ textNodeName: "#text" }) streamResult = parser.parseStream("<root><item>Text</item><") // Just '<' streamResult = parser.parseStream(null) // End stream expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ root: { item: { "#text": "Text" }, "#text": "<" } }], // Fragment as text }) parser.reset() parser = new PartialXMLStreamParser({ textNodeName: "#text" }) streamResult = parser.parseStream("<root attr='val") streamResult = parser.parseStream(null) // End stream expect(streamResult).toEqual({ metadata: { partial: true }, // Depending on how strictly attributes are parsed before '>', // this might be an empty root or root with partial text. // Current behavior treats "<root attr='val" as text if not closed by ">" xml: [{ "#text": "<root attr='val" }], }) }) describe("maxDepth feature", () => { it("should treat tags beyond maxDepth as stopNodes", () => { parser = new PartialXMLStreamParser({ maxDepth: 2, textNodeName: "#text", }) let streamResult = parser.parseStream( "<root><level1><level2><level3>content</level3></level2></level1></root>", ) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { level1: { level2: { "#text": "<level3>content</level3>" }, }, }, }, ], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { level1: { level2: { "#text": "<level3>content</level3>" }, }, }, }, ], }) }) it("should handle maxDepth with nested tags and attributes", () => { parser = new PartialXMLStreamParser({ maxDepth: 3, textNodeName: "#text", attributeNamePrefix: "@", }) let streamResult = parser.parseStream('<root><a><b><c id="test"><d>deep content</d></c></b></a></root>') expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { a: { b: { c: { "#text": "<d>deep content</d>", "@id": "test", }, }, }, }, }, ], }) }) it("should work with maxDepth 1 (only root level allowed)", () => { parser = new PartialXMLStreamParser({ maxDepth: 1, textNodeName: "#text", }) let streamResult = parser.parseStream("<root><child>content</child></root>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { child: { "#text": "content" }, }, }, ], }) }) it("should work with maxDepth 0 (treat everything as text)", () => { parser = new PartialXMLStreamParser({ maxDepth: 0, textNodeName: "#text", }) let streamResult = parser.parseStream("<root><child>content</child></root>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: ["<root><child>content</child></root>"], }) }) it("should combine maxDepth with existing stopNodes", () => { parser = new PartialXMLStreamParser({ maxDepth: 3, stopNodes: ["root.level1.script"], textNodeName: "#text", }) let streamResult = parser.parseStream( "<root><level1><script>code</script><level2><level3>content</level3></level2></level1></root>", ) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { level1: { script: { "#text": "code" }, level2: { level3: { "#text": "content" }, }, }, }, }, ], }) }) it("should handle self-closing tags at max depth", () => { parser = new PartialXMLStreamParser({ maxDepth: 2, textNodeName: "#text", }) let streamResult = parser.parseStream( "<root><level1><selfclosing/><level2>content</level2></level1></root>", ) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { level1: { selfclosing: {}, level2: { "#text": "content" }, }, }, }, ], }) }) it("should handle null maxDepth (no depth limit)", () => { parser = new PartialXMLStreamParser({ maxDepth: null, textNodeName: "#text", }) let streamResult = parser.parseStream("<root><a><b><c><d><e>deep content</e></d></c></b></a></root>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { a: { b: { c: { d: { e: { "#text": "deep content" }, }, }, }, }, }, }, ], }) }) }) describe("stopNodes feature", () => { it("should treat content of a stopNode as text", () => { parser = new PartialXMLStreamParser({ stopNodes: ["script"], textNodeName: "#text", // alwaysCreateTextNode is true by default }) let streamResult = parser.parseStream("<root><script>let a = 1; console.log(a);</script></root>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { script: { "#text": "let a = 1; console.log(a);" } } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { script: { "#text": "let a = 1; console.log(a);" } } }], }) }) it("should parse attributes of a stopNode", () => { parser = new PartialXMLStreamParser({ stopNodes: ["script"], attributeNamePrefix: "@", textNodeName: "#text", // alwaysCreateTextNode is true by default }) let streamResult = parser.parseStream( '<root><script type="text/javascript" src="app.js">let b = 2;</script></root>', ) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { script: { "@type": "text/javascript", "@src": "app.js", "#text": "let b = 2;", }, }, }, ], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { script: { "@type": "text/javascript", "@src": "app.js", "#text": "let b = 2;", }, }, }, ], }) }) it("should not parse XML tags inside a stopNode", () => { parser = new PartialXMLStreamParser({ stopNodes: ["data"], textNodeName: "#text", // alwaysCreateTextNode is true by default }) let streamResult = parser.parseStream("<root><data><item>one</item><value>100</value></data></root>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { data: { "#text": "<item>one</item><value>100</value>" } }, }, ], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { data: { "#text": "<item>one</item><value>100</value>" } }, }, ], }) }) it("should handle multiple stopNode types", () => { parser = new PartialXMLStreamParser({ stopNodes: ["script", "style"], textNodeName: "#text", // alwaysCreateTextNode is true by default }) let streamResult = parser.parseStream( "<root><script>var c=3;</script><style>.cls{color:red}</style></root>", ) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { script: { "#text": "var c=3;" }, style: { "#text": ".cls{color:red}" }, }, }, ], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { script: { "#text": "var c=3;" }, style: { "#text": ".cls{color:red}" }, }, }, ], }) }) it("should handle self-closing tags within stopNode content", () => { parser = new PartialXMLStreamParser({ stopNodes: ["htmlData"], textNodeName: "#text", // alwaysCreateTextNode is true by default }) let streamResult = parser.parseStream( '<doc><htmlData>Some text <br/> and more <img src="test.png"/></htmlData></doc>', ) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { doc: { htmlData: { "#text": 'Some text <br/> and more <img src="test.png"/>', }, }, }, ], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { doc: { htmlData: { "#text": 'Some text <br/> and more <img src="test.png"/>', }, }, }, ], }) }) it("should handle unterminated stopNode at end of stream", () => { parser = new PartialXMLStreamParser({ stopNodes: ["raw"], textNodeName: "#text", // alwaysCreateTextNode is true by default }) let streamResult = parser.parseStream("<root><raw>This content is not closed") expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ root: { raw: { "#text": "This content is not closed" } } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: true }, xml: [{ root: { raw: { "#text": "This content is not closed" } } }], }) }) it("should correctly handle nested stopNodes of the same name", () => { parser = new PartialXMLStreamParser({ stopNodes: ["codeblock"], textNodeName: "#text", // alwaysCreateTextNode is true by default }) const xml = "<doc><codeblock>Outer <codeblock>Inner</codeblock> Content</codeblock></doc>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { doc: { codeblock: { "#text": "Outer <codeblock>Inner</codeblock> Content", }, }, }, ], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { doc: { codeblock: { "#text": "Outer <codeblock>Inner</codeblock> Content", }, }, }, ], }) }) it("should handle stopNode as the root element", () => { parser = new PartialXMLStreamParser({ stopNodes: ["rawhtml"], textNodeName: "#text", // alwaysCreateTextNode is true by default }) let streamResult = parser.parseStream("<rawhtml><head></head><body><p>Hello</p></body></rawhtml>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ rawhtml: { "#text": "<head></head><body><p>Hello</p></body>" } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ rawhtml: { "#text": "<head></head><body><p>Hello</p></body>" } }], }) }) it("should handle empty stopNode", () => { parser = new PartialXMLStreamParser({ stopNodes: ["emptyContent"], textNodeName: "#text", // alwaysCreateTextNode is true by default }) let streamResult = parser.parseStream("<data><emptyContent></emptyContent></data>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ data: { emptyContent: { "#text": "" } } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ data: { emptyContent: { "#text": "" } } }], }) }) it("should handle stopNode with only whitespace content", () => { parser = new PartialXMLStreamParser({ stopNodes: ["whitespaceNode"], textNodeName: "#text", // alwaysCreateTextNode is true by default }) let streamResult = parser.parseStream("<data><whitespaceNode> \n\t </whitespaceNode></data>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ data: { whitespaceNode: { "#text": " \n\t " } } }], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ data: { whitespaceNode: { "#text": " \n\t " } } }], }) }) it("should handle stopNode content split across multiple chunks", () => { parser = new PartialXMLStreamParser({ stopNodes: ["log"], textNodeName: "#text", // alwaysCreateTextNode is true by default }) parser.parseStream("<system><log>Part 1 data ") let streamResult = parser.parseStream("Part 2 data <inner>tag</inner> and more") expect(streamResult).toEqual({ metadata: { partial: true }, xml: [ { system: { log: { "#text": "Part 1 data Part 2 data <inner>tag</inner> and more", }, }, }, ], }) streamResult = parser.parseStream(" final part.</log></system>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { system: { log: { "#text": "Part 1 data Part 2 data <inner>tag</inner> and more final part.", }, }, }, ], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { system: { log: { "#text": "Part 1 data Part 2 data <inner>tag</inner> and more final part.", }, }, }, ], }) }) it("should handle stopNode with attributes and content split across chunks", () => { parser = new PartialXMLStreamParser({ stopNodes: ["customTag"], attributeNamePrefix: "@", textNodeName: "#text", // alwaysCreateTextNode is true by default }) parser.parseStream('<root><customTag id="123" ') parser.parseStream('name="test">This is the ') let streamResult = parser.parseStream("content with wewnętrzny tag <tag/>.</customTag></root>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { customTag: { "@id": "123", "@name": "test", "#text": "This is the content with wewnętrzny tag <tag/>.", }, }, }, ], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { customTag: { "@id": "123", "@name": "test", "#text": "This is the content with wewnętrzny tag <tag/>.", }, }, }, ], }) }) it("should handle stop node when stopNodes option is a string", () => { parser = new PartialXMLStreamParser({ stopNodes: "script", textNodeName: "#text", // alwaysCreateTextNode is true by default }) let streamResult = parser.parseStream('<root><script>alert("hello");</script></root>') expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ root: { script: { "#text": 'alert("hello");' } } }], }) }) it("should handle path-based stopNode correctly", () => { parser = new PartialXMLStreamParser({ stopNodes: ["read.file.metadata"], textNodeName: "#text", // alwaysCreateTextNode is true by default }) const xml = "<read><metadata><item>one</item></metadata><file><metadata><item>two</item><subitem>three</subitem></metadata><other>data</other></file></read>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { read: { metadata: { item: { "#text": "one" } }, // Not a stopNode file: { metadata: { "#text": "<item>two</item><subitem>three</subitem>", }, // Is a stopNode other: { "#text": "data" }, // Not a stopNode }, }, }, ], }) streamResult = parser.parseStream(null) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { read: { metadata: { item: { "#text": "one" } }, file: { metadata: { "#text": "<item>two</item><subitem>three</subitem>", }, other: { "#text": "data" }, }, }, }, ], }) }) it("should prioritize path-based stopNode over simple name if both could match", () => { parser = new PartialXMLStreamParser({ stopNodes: ["read.file.metadata", "nomatch.metadata"], // read.file.metadata will match textNodeName: "#text", // alwaysCreateTextNode is true by default }) const xml = "<read><metadata><item>one</item></metadata><file><metadata><item>two</item></metadata></file></read>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { read: { metadata: { item: { "#text": "one" } }, // Not a stopNode file: { metadata: { "#text": "<item>two</item>" }, // Is a stopNode due to path }, }, }, ], }) }) it("should handle simple stopNode alongside path-based stopNode", () => { parser = new PartialXMLStreamParser({ stopNodes: ["script", "app.config.settings.value"], textNodeName: "#text", // alwaysCreateTextNode is true by default }) const xml = "<app><script>let x=1;</script><config><settings><value>secret</value><other>val</other></settings></config></app>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { app: { script: { "#text": "let x=1;" }, // Simple stopNode config: { settings: { value: { "#text": "secret" }, // Path-based stopNode other: { "#text": "val" }, // Not a stopNode }, }, }, }, ], }) }) it("should handle a stopNode whose text content contains its own closing tag string", () => { // Parser from beforeEach might have other defaults, re-initialize for clarity parser = new PartialXMLStreamParser({ stopNodes: ["c"], // Make 'c' a stopNode textNodeName: "#text", // Consistent with other tests and default beforeEach }) const message = `<a><b>src/file.ts</b><c> \tfunction example() { \t// This has XML-like content: &lt;/c&gt; \treturn true; \t} \t</c></a>` const expectedOutput = { metadata: { partial: false }, xml: [ { a: { b: { "#text": "src/file.ts" }, c: { "#text": "\n\tfunction example() {\n\t// This has XML-like content: &lt;/c&gt;\n\treturn true;\n\t}\n\t", }, }, }, ], } let streamResult = parser.parseStream(message) expect(streamResult).toEqual(expectedOutput) // Test with null to ensure final state is also correct and parser considers it complete streamResult = parser.parseStream(null) expect(streamResult).toEqual(expectedOutput) }) describe("stopNode wildcard patterns", () => { it("should handle wildcard patterns with asterisk at end (prefix matching)", () => { parser = new PartialXMLStreamParser({ stopNodes: ["app.*"], textNodeName: "#text", }) const xml = "<root><app><config><item>not parsed</item><value>123</value></config></app><app><settings><option>also not parsed</option></settings></app></root>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { app: [ { config: { "#text": "<item>not parsed</item><value>123</value>" } }, { settings: { "#text": "<option>also not parsed</option>" } }, ], }, }, ], }) }) it("should handle wildcard patterns with asterisk at beginning (suffix matching)", () => { parser = new PartialXMLStreamParser({ stopNodes: ["*.suggest"], textNodeName: "#text", }) const xml = "<root><follow_up><suggest><option>Option 1</option><desc>Description</desc></suggest></follow_up><other><suggest><item>not parsed</item></suggest></other></root>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { follow_up: { suggest: { "#text": "<option>Option 1</option><desc>Description</desc>" }, }, other: { suggest: { "#text": "<item>not parsed</item>" } }, }, }, ], }) }) it("should handle wildcard patterns in the middle", () => { parser = new PartialXMLStreamParser({ stopNodes: ["app.*.value"], textNodeName: "#text", }) const xml = "<root><app><config><value><item>not parsed</item><data>123</data></value></config><settings><value><option>also not parsed</option></value></settings></app></root>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { app: { config: { value: { "#text": "<item>not parsed</item><data>123</data>" } }, settings: { value: { "#text": "<option>also not parsed</option>" } }, }, }, }, ], }) }) it("should handle multiple wildcards in one pattern", () => { parser = new PartialXMLStreamParser({ stopNodes: ["*.config.*"], textNodeName: "#text", }) const xml = "<root><app><config><value><item>not parsed</item></value><setting><data>also not parsed</data></setting></config></app><other><config><data><nested>not parsed either</nested></data></config></other></root>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { app: { config: { value: { "#text": "<item>not parsed</item>" }, setting: { "#text": "<data>also not parsed</data>" }, }, }, other: { config: { data: { "#text": "<nested>not parsed either</nested>" } }, }, }, }, ], }) }) it("should handle wildcard patterns with suffix matching for longer paths", () => { parser = new PartialXMLStreamParser({ stopNodes: ["follow_up.*"], textNodeName: "#text", }) const xml = "<ask_followup_question><question>What?</question><follow_up><suggest><option>Option 1</option></suggest><suggest><option>Option 2</option></suggest></follow_up></ask_followup_question>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { ask_followup_question: { question: { "#text": "What?" }, follow_up: { suggest: [ { "#text": "<option>Option 1</option>" }, { "#text": "<option>Option 2</option>" }, ], }, }, }, ], }) }) it("should handle complex wildcard patterns with exact and suffix matching", () => { parser = new PartialXMLStreamParser({ stopNodes: ["a.*"], textNodeName: "#text", }) const xml = "<root><a><b><item>exact match not parsed</item></b></a><deep><nested><a><c><data>suffix match not parsed</data></c></a></nested></deep></root>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { a: { b: { "#text": "<item>exact match not parsed</item>" } }, deep: { nested: { a: { c: { "#text": "<data>suffix match not parsed</data>" } }, }, }, }, }, ], }) }) it("should handle wildcard patterns mixed with regular stopNodes", () => { parser = new PartialXMLStreamParser({ stopNodes: ["script", "app.*", "*.config"], textNodeName: "#text", }) const xml = "<root><script>let x = <tag>not parsed</tag>;</script><app><settings><item>not parsed</item></settings></app><other><config><data>also not parsed</data></config></other></root>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { script: { "#text": "let x = <tag>not parsed</tag>;" }, app: { settings: { "#text": "<item>not parsed</item>" } }, other: { config: { "#text": "<data>also not parsed</data>" } }, }, }, ], }) }) it("should not match partial wildcard patterns", () => { parser = new PartialXMLStreamParser({ stopNodes: ["app.config.*"], textNodeName: "#text", }) const xml = "<root><app><other><item>normal parsing</item></other></app><different><config><value><data>also normal</data></value></config></different></root>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { app: { other: { item: { "#text": "normal parsing" } } }, different: { config: { value: { data: { "#text": "also normal" } } }, }, }, }, ], }) }) it("should handle wildcard patterns with attributes", () => { parser = new PartialXMLStreamParser({ stopNodes: ["app.*"], textNodeName: "#text", attributeNamePrefix: "@", }) const xml = '<root><app><config id="test"><item>not parsed</item><value attr="val">also not parsed</value></config></app></root>' let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { app: { config: { "@id": "test", "#text": '<item>not parsed</item><value attr="val">also not parsed</value>', }, }, }, }, ], }) }) }) describe("stopNode CDATA handling", () => { it("should handle CDATA content inside stopnodes", () => { parser = new PartialXMLStreamParser({ stopNodes: ["script"], textNodeName: "#text", }) const input = `<root> <script type="text/javascript"> <![CDATA[ if (x < y && z > 0) { alert("Hello <world>!"); } ]]> </script> </root>` const result = parser.parseStream(input) // The CDATA content should be extracted and the CDATA markers should be removed expect(result!.xml![0].root.script["#text"]).toBe(` if (x < y && z > 0) { alert("Hello <world>!"); } `) }) it("should handle multiple CDATA sections in stopnodes", () => { parser = new PartialXMLStreamParser({ stopNodes: ["code"], textNodeName: "#text", }) const input = `<root> <code> Some text before <![CDATA[<tag>content</tag>]]> Some text between <![CDATA[more & content]]> Some text after </code> </root>` const result = parser.parseStream(input) // CDATA markers should be removed, content should be preserved expect(result!.xml![0].root.code["#text"]).toBe(` Some text before <tag>content</tag> Some text between more & content Some text after `) }) it("should handle partial CDATA in streaming stopnodes", () => { parser = new PartialXMLStreamParser({ stopNodes: ["data"], textNodeName: "#text", }) // Stream the content in chunks let result = parser.parseStream("<root><data><![CDATA[partial") expect(result!.metadata.partial).toBe(true) result = parser.parseStream(" content]]></data></root>") expect(result!.metadata.partial).toBe(false) // Should extract CDATA content properly expect(result!.xml![0].root.data["#text"]).toBe("partial content") }) it("should handle CDATA spanning across tag boundaries in stopnodes", () => { parser = new PartialXMLStreamParser({ stopNodes: ["a.b"], textNodeName: "#text", }) const input = `<a><b><![CDATA[adsasdas</b>]]></b></a>` const result = parser.parseStream(input) // CDATA content should be extracted properly even when it spans across tag boundaries expect(result!.xml![0].a.b["#text"]).toBe("adsasdas</b>") }) }) }) describe("alwaysCreateTextNode option", () => { it("should always create #text node when alwaysCreateTextNode is true for non-whitespace text", () => { parser = new PartialXMLStreamParser({ alwaysCreateTextNode: true, textNodeName: "#text", }) let streamResult = parser.parseStream("<doc>Text</doc>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ doc: { "#text": "Text" } }], }) }) it("should not simplify text-only elements if alwaysCreateTextNode is true", () => { parser = new PartialXMLStreamParser({ alwaysCreateTextNode: true, textNodeName: "#text", }) let streamResult = parser.parseStream("<parent><child>simple text</child></parent>") expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ parent: { child: { "#text": "simple text" } } }], }) }) it("should create #text for elements with attributes and text when alwaysCreateTextNode is true", () => { parser = new PartialXMLStreamParser({ alwaysCreateTextNode: true, textNodeName: "#text", attributeNamePrefix: "@", }) let streamResult = parser.parseStream('<parent><child attr="val">text content</child></parent>') expect(streamResult).toEqual({ metadata: { partial: false }, xml: [{ parent: { child: { "@attr": "val", "#text": "text content" } } }], }) }) it("should handle mixed content with alwaysCreateTextNode true, omitting whitespace-only nodes", () => { parser = new PartialXMLStreamParser({ alwaysCreateTextNode: true, textNodeName: "#text", }) const xml = "<root>text1 <item>itemtext</item> text2 <another/> text3</root>" let streamResult = parser.parseStream(xml) expect(streamResult).toEqual({ metadata: { partial: false }, xml: [ { root: { "#text": "text1 text2 text3", item: { "#text": "itemtext" }, another: {}, }, }, ], }) }) }) describe("parsePrimitives option", () => { it("should parse numbers and booleans in text nodes if parsePrimitives is true", () => { parser = new PartialXMLStreamParser({ parsePrimitives: true, textNodeName: "#text", // alwaysCreateTextNode is true by default }) let streamResult = parser.parseStream( "<data><num>123</num><bool>tru