partial-xml-stream-parser
Version:
A lenient XML stream parser for Node.js and browsers that can handle incomplete or malformed XML data, with depth control, CDATA support for XML serialization and round-trip parsing, wildcard pattern support for stopNodes, and CDATA handling within stopNo
1,629 lines (1,517 loc) • 91.4 kB
text/typescript
import { describe, it, expect, beforeEach } from "vitest"
import { PartialXMLStreamParser, xmlObjectToString } from "../index"
describe("PartialXMLStreamParser", () => {
let parser: PartialXMLStreamParser
beforeEach(() => {
// Default parser for most tests, now implies alwaysCreateTextNode: true
parser = new PartialXMLStreamParser({ textNodeName: "#text" })
})
it("should parse a stream chunk by chunk correctly", () => {
let streamResult
streamResult = parser.parseStream("<read>")
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ read: {} }],
})
streamResult = parser.parseStream("<args>")
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ read: { args: {} } }],
})
streamResult = parser.parseStream("<file><name>as")
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ read: { args: { file: { name: { "#text": "as" } } } } }],
})
streamResult = parser.parseStream("d</name>")
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ read: { args: { file: { name: { "#text": "asd" } } } } }],
})
streamResult = parser.parseStream("</file></args>")
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ read: { args: { file: { name: { "#text": "asd" } } } } }],
})
streamResult = parser.parseStream("</read>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ read: { args: { file: { name: { "#text": "asd" } } } } }],
})
streamResult = parser.parseStream(null) // Signal end of stream
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ read: { args: { file: { name: { "#text": "asd" } } } } }],
})
})
it("should handle a single incomplete chunk, then completion", () => {
let streamResult
const singleChunk = "<request><id>123</id><data>value<da"
streamResult = parser.parseStream(singleChunk)
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ request: { id: { "#text": "123" }, data: { "#text": "value<da" } } }],
})
streamResult = parser.parseStream("ta></request>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
request: {
id: { "#text": "123" },
data: { "#text": "value<data>" },
},
},
],
})
streamResult = parser.parseStream(null) // Signal end
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
request: {
id: { "#text": "123" },
data: { "#text": "value<data>" },
},
},
],
})
})
it("should handle a text-only stream", () => {
let streamResult
streamResult = parser.parseStream("Just some text")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: ["Just some text"],
})
streamResult = parser.parseStream(null) // End stream
expect(streamResult).toEqual({
metadata: { partial: false },
xml: ["Just some text"],
})
})
it("should handle self-closing tags and mixed content", () => {
let streamResult
streamResult = parser.parseStream("<root><item/>Text after item<another/></root>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { item: {}, "#text": "Text after item", another: {} } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { item: {}, "#text": "Text after item", another: {} } }],
})
})
it("should handle XML entities in text nodes", () => {
parser = new PartialXMLStreamParser({ textNodeName: "#text" }) // Re-init to be sure about options
let streamResult = parser.parseStream("<doc>Hello & \"World\" 'Test'</doc>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ doc: { "#text": "Hello & \"World\" 'Test'" } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ doc: { "#text": "Hello & \"World\" 'Test'" } }],
})
})
it("should handle XML entities in attribute values", () => {
parser = new PartialXMLStreamParser({
textNodeName: "#text",
attributeNamePrefix: "@",
})
let streamResult = parser.parseStream('<doc val="<value>" />')
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ doc: { "@val": "<value>" } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ doc: { "@val": "<value>" } }],
})
})
it("should handle numeric XML entities (decimal and hex)", () => {
parser = new PartialXMLStreamParser({ textNodeName: "#text" })
let streamResult = parser.parseStream("<doc><Hello& World></doc>") // <Hello& World>
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ doc: { "#text": "<Hello& World>" } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ doc: { "#text": "<Hello& World>" } }],
})
})
it("should correctly parse multiple chunks that form a complete XML", () => {
parser = new PartialXMLStreamParser({
textNodeName: "#text",
attributeNamePrefix: "@",
})
parser.parseStream("<data><item")
parser.parseStream(' key="value">Te')
parser.parseStream("st</item><item2")
let streamResult = parser.parseStream("/></data>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ data: { item: { "@key": "value", "#text": "Test" }, item2: {} } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ data: { item: { "@key": "value", "#text": "Test" }, item2: {} } }],
})
})
it("should return empty array xml for empty stream", () => {
parser = new PartialXMLStreamParser()
let streamResult = parser.parseStream("")
expect(streamResult).toEqual({ metadata: { partial: true }, xml: [] })
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({ metadata: { partial: false }, xml: [] })
})
it("should handle stream with only XML declaration and comments", () => {
parser = new PartialXMLStreamParser()
let streamResult = parser.parseStream('<?xml version="1.0"?><!-- comment -->')
expect(streamResult).toEqual({ metadata: { partial: false }, xml: [] })
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({ metadata: { partial: false }, xml: [] })
})
it("should handle custom attributeNamePrefix", () => {
parser = new PartialXMLStreamParser({ attributeNamePrefix: "_" })
let streamResult = parser.parseStream('<doc attr="val" />')
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ doc: { _attr: "val" } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ doc: { _attr: "val" } }],
})
parser = new PartialXMLStreamParser({ attributeNamePrefix: "" })
streamResult = parser.parseStream('<doc attr="val" />')
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ doc: { attr: "val" } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ doc: { attr: "val" } }],
})
})
it("should parse CDATA sections correctly", () => {
parser = new PartialXMLStreamParser({ textNodeName: "#text" })
let streamResult = parser.parseStream("<root><![CDATA[This is <CDATA> text with & special chars]]></root>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { "#text": "This is <CDATA> text with & special chars" } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { "#text": "This is <CDATA> text with & special chars" } }],
})
})
it("should handle unterminated CDATA section", () => {
parser = new PartialXMLStreamParser({ textNodeName: "#text" })
let streamResult = parser.parseStream("<root><![CDATA[Unterminated cdata")
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ root: { "#text": "Unterminated cdata" } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ root: { "#text": "Unterminated cdata" } }],
})
})
it("should handle CDATA at root level if it is the only content", () => {
parser = new PartialXMLStreamParser()
let streamResult = parser.parseStream("<![CDATA[Root CDATA]]>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: ["Root CDATA"],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: ["Root CDATA"],
})
})
it("should handle unterminated comments", () => {
parser = new PartialXMLStreamParser()
let streamResult = parser.parseStream("<root><!-- This is an unterminated comment")
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ root: {} }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ root: {} }],
})
})
it("should handle unterminated DOCTYPE", () => {
parser = new PartialXMLStreamParser()
let streamResult = parser.parseStream('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"')
expect(streamResult).toEqual({ metadata: { partial: true }, xml: [] })
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({ metadata: { partial: false }, xml: [] })
})
it("should handle unterminated XML declaration", () => {
parser = new PartialXMLStreamParser()
let streamResult = parser.parseStream('<?xml version="1.0" encoding="UTF-8"')
expect(streamResult).toEqual({ metadata: { partial: true }, xml: [] })
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({ metadata: { partial: false }, xml: [] })
})
it("should leniently handle mismatched closing tags", () => {
parser = new PartialXMLStreamParser({ textNodeName: "#text" })
let streamResult = parser.parseStream("<root><item>text</mismatched></item></root>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { item: { "#text": "text</mismatched>" } } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { item: { "#text": "text</mismatched>" } } }],
})
})
it("should handle attributes without explicit values (boolean attributes) as true", () => {
parser = new PartialXMLStreamParser({ attributeNamePrefix: "@" })
let streamResult = parser.parseStream('<input disabled checked="checked" required />')
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
input: {
"@disabled": true,
"@checked": "checked",
"@required": true,
},
},
],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
input: {
"@disabled": true,
"@checked": "checked",
"@required": true,
},
},
],
})
})
it("should correctly simplify text-only elements", () => {
// This test now reflects alwaysCreateTextNode: true behavior from beforeEach
let streamResult = parser.parseStream("<parent><child>simple text</child></parent>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ parent: { child: { "#text": "simple text" } } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ parent: { child: { "#text": "simple text" } } }],
})
})
it("should not simplify elements with attributes even if they also have text", () => {
// This test already aligns with alwaysCreateTextNode: true behavior
parser = new PartialXMLStreamParser({
textNodeName: "#text",
attributeNamePrefix: "@",
})
let streamResult = parser.parseStream('<parent><child attr="val">text content</child></parent>')
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ parent: { child: { "@attr": "val", "#text": "text content" } } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ parent: { child: { "@attr": "val", "#text": "text content" } } }],
})
})
it("should not simplify elements with child elements", () => {
// This test's expectation doesn't change with alwaysCreateTextNode
parser = new PartialXMLStreamParser() // Uses new default alwaysCreateTextNode: true
let streamResult = parser.parseStream("<parent><child><grandchild/></child></parent>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ parent: { child: { grandchild: {} } } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ parent: { child: { grandchild: {} } } }],
})
})
it("should ignore text nodes containing only whitespace by default", () => {
// Expectation changes due to alwaysCreateTextNode: true from beforeEach
let streamResult = parser.parseStream("<root> <item>text</item> </root>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { item: { "#text": "text" } } }], // Whitespace around item is trimmed, text inside item gets #text
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { item: { "#text": "text" } } }],
})
})
it("should omit whitespace text nodes even if alwaysCreateTextNode is true", () => {
parser = new PartialXMLStreamParser({
textNodeName: "#text",
alwaysCreateTextNode: true,
})
let streamResult = parser.parseStream("<root> <item>text</item> </root>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { item: { "#text": "text" } } }], // Whitespace-only nodes between tags are omitted
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { item: { "#text": "text" } } }],
})
})
it("should handle text at root level before any tags", () => {
parser = new PartialXMLStreamParser() // Uses new default
let streamResult = parser.parseStream("Leading text<root/>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: ["Leading text", { root: {} }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: ["Leading text", { root: {} }],
})
})
it("should handle text at root level after all tags are closed", () => {
parser = new PartialXMLStreamParser() // Uses new default
let streamResult = parser.parseStream("<root/>Trailing text")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: {} }, "Trailing text"],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: {} }, "Trailing text"],
})
})
it("should handle multiple root elements", () => {
// Expectation changes due to alwaysCreateTextNode: true from beforeEach
let streamResult = parser.parseStream("<rootA/><rootB>text</rootB>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ rootA: {} }, { rootB: { "#text": "text" } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ rootA: {} }, { rootB: { "#text": "text" } }],
})
})
it("should handle multiple root elements in specific order", () => {
// Expectation changes due to alwaysCreateTextNode: true from beforeEach
const xml = "<thinking>a</thinking><some-tool></some-tool>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ thinking: { "#text": "a" } }, { "some-tool": {} }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ thinking: { "#text": "a" } }, { "some-tool": {} }],
})
})
it("should handle Buffer input", () => {
// Expectation changes due to alwaysCreateTextNode: true from beforeEach
let streamResult = parser.parseStream(Buffer.from("<data>value</data>"))
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ data: { "#text": "value" } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ data: { "#text": "value" } }],
})
})
it("should handle multiple attributes correctly", () => {
parser = new PartialXMLStreamParser({ attributeNamePrefix: "@" })
let streamResult = parser.parseStream("<tag attr1=\"val1\" attr2='val2' attr3=val3 />")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ tag: { "@attr1": "val1", "@attr2": "val2", "@attr3": "val3" } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ tag: { "@attr1": "val1", "@attr2": "val2", "@attr3": "val3" } }],
})
})
it("should handle incomplete tags at end of chunk and then completed", () => {
parser = new PartialXMLStreamParser({
// Uses new default alwaysCreateTextNode: true
textNodeName: "#text",
attributeNamePrefix: "@",
})
parser.parseStream("<root><item")
let streamResult = parser.parseStream(" attr='1'>Text</item></r")
expect(streamResult!.xml![0].root.item).toEqual({
"@attr": "1",
"#text": "Text",
})
expect(streamResult!.xml![0].root["#text"]).toBe("</r") // This part becomes text
expect(streamResult!.metadata.partial).toBe(true)
parser = new PartialXMLStreamParser({
textNodeName: "#text",
attributeNamePrefix: "@",
})
parser.parseStream("<root><item")
streamResult = parser.parseStream(" attr='1'>Text</item></root>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { item: { "@attr": "1", "#text": "Text" } } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { item: { "@attr": "1", "#text": "Text" } } }],
})
})
it("should handle empty string chunks in midst of stream", () => {
// Expectation changes due to alwaysCreateTextNode: true from beforeEach
parser.parseStream("<doc>")
parser.parseStream("")
let streamResult = parser.parseStream("<content>Hello</content>")
expect(streamResult!.xml![0].doc.content).toEqual({ "#text": "Hello" })
expect(streamResult!.metadata.partial).toBe(true)
let finalDocStreamResult = parser.parseStream("</doc>")
expect(finalDocStreamResult!.xml![0].doc.content).toEqual({
"#text": "Hello",
})
expect(finalDocStreamResult!.metadata.partial).toBe(false)
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ doc: { content: { "#text": "Hello" } } }],
})
})
it("should set partial:true when stream ends with an incomplete tag", () => {
parser = new PartialXMLStreamParser({ textNodeName: "#text" })
let streamResult = parser.parseStream("<root><incompleteTag")
streamResult = parser.parseStream(null) // End stream
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ root: { "#text": "<incompleteTag" } }], // The fragment is treated as text of parent
})
parser.reset()
parser = new PartialXMLStreamParser({ textNodeName: "#text" })
streamResult = parser.parseStream("<root><item>Text</item></incompleteCl")
streamResult = parser.parseStream(null) // End stream
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ root: { item: { "#text": "Text" }, "#text": "</incompleteCl" } }], // Fragment as text
})
parser.reset()
parser = new PartialXMLStreamParser({ textNodeName: "#text" })
streamResult = parser.parseStream("<root><item>Text</item><") // Just '<'
streamResult = parser.parseStream(null) // End stream
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ root: { item: { "#text": "Text" }, "#text": "<" } }], // Fragment as text
})
parser.reset()
parser = new PartialXMLStreamParser({ textNodeName: "#text" })
streamResult = parser.parseStream("<root attr='val")
streamResult = parser.parseStream(null) // End stream
expect(streamResult).toEqual({
metadata: { partial: true },
// Depending on how strictly attributes are parsed before '>',
// this might be an empty root or root with partial text.
// Current behavior treats "<root attr='val" as text if not closed by ">"
xml: [{ "#text": "<root attr='val" }],
})
})
describe("maxDepth feature", () => {
it("should treat tags beyond maxDepth as stopNodes", () => {
parser = new PartialXMLStreamParser({
maxDepth: 2,
textNodeName: "#text",
})
let streamResult = parser.parseStream(
"<root><level1><level2><level3>content</level3></level2></level1></root>",
)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
level1: {
level2: { "#text": "<level3>content</level3>" },
},
},
},
],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
level1: {
level2: { "#text": "<level3>content</level3>" },
},
},
},
],
})
})
it("should handle maxDepth with nested tags and attributes", () => {
parser = new PartialXMLStreamParser({
maxDepth: 3,
textNodeName: "#text",
attributeNamePrefix: "@",
})
let streamResult = parser.parseStream('<root><a><b><c id="test"><d>deep content</d></c></b></a></root>')
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
a: {
b: {
c: {
"#text": "<d>deep content</d>",
"@id": "test",
},
},
},
},
},
],
})
})
it("should work with maxDepth 1 (only root level allowed)", () => {
parser = new PartialXMLStreamParser({
maxDepth: 1,
textNodeName: "#text",
})
let streamResult = parser.parseStream("<root><child>content</child></root>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
child: { "#text": "content" },
},
},
],
})
})
it("should work with maxDepth 0 (treat everything as text)", () => {
parser = new PartialXMLStreamParser({
maxDepth: 0,
textNodeName: "#text",
})
let streamResult = parser.parseStream("<root><child>content</child></root>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: ["<root><child>content</child></root>"],
})
})
it("should combine maxDepth with existing stopNodes", () => {
parser = new PartialXMLStreamParser({
maxDepth: 3,
stopNodes: ["root.level1.script"],
textNodeName: "#text",
})
let streamResult = parser.parseStream(
"<root><level1><script>code</script><level2><level3>content</level3></level2></level1></root>",
)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
level1: {
script: { "#text": "code" },
level2: {
level3: { "#text": "content" },
},
},
},
},
],
})
})
it("should handle self-closing tags at max depth", () => {
parser = new PartialXMLStreamParser({
maxDepth: 2,
textNodeName: "#text",
})
let streamResult = parser.parseStream(
"<root><level1><selfclosing/><level2>content</level2></level1></root>",
)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
level1: {
selfclosing: {},
level2: { "#text": "content" },
},
},
},
],
})
})
it("should handle null maxDepth (no depth limit)", () => {
parser = new PartialXMLStreamParser({
maxDepth: null,
textNodeName: "#text",
})
let streamResult = parser.parseStream("<root><a><b><c><d><e>deep content</e></d></c></b></a></root>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
a: {
b: {
c: {
d: {
e: { "#text": "deep content" },
},
},
},
},
},
},
],
})
})
})
describe("stopNodes feature", () => {
it("should treat content of a stopNode as text", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["script"],
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
let streamResult = parser.parseStream("<root><script>let a = 1; console.log(a);</script></root>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { script: { "#text": "let a = 1; console.log(a);" } } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { script: { "#text": "let a = 1; console.log(a);" } } }],
})
})
it("should parse attributes of a stopNode", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["script"],
attributeNamePrefix: "@",
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
let streamResult = parser.parseStream(
'<root><script type="text/javascript" src="app.js">let b = 2;</script></root>',
)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
script: {
"@type": "text/javascript",
"@src": "app.js",
"#text": "let b = 2;",
},
},
},
],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
script: {
"@type": "text/javascript",
"@src": "app.js",
"#text": "let b = 2;",
},
},
},
],
})
})
it("should not parse XML tags inside a stopNode", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["data"],
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
let streamResult = parser.parseStream("<root><data><item>one</item><value>100</value></data></root>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: { data: { "#text": "<item>one</item><value>100</value>" } },
},
],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: { data: { "#text": "<item>one</item><value>100</value>" } },
},
],
})
})
it("should handle multiple stopNode types", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["script", "style"],
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
let streamResult = parser.parseStream(
"<root><script>var c=3;</script><style>.cls{color:red}</style></root>",
)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
script: { "#text": "var c=3;" },
style: { "#text": ".cls{color:red}" },
},
},
],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
script: { "#text": "var c=3;" },
style: { "#text": ".cls{color:red}" },
},
},
],
})
})
it("should handle self-closing tags within stopNode content", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["htmlData"],
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
let streamResult = parser.parseStream(
'<doc><htmlData>Some text <br/> and more <img src="test.png"/></htmlData></doc>',
)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
doc: {
htmlData: {
"#text": 'Some text <br/> and more <img src="test.png"/>',
},
},
},
],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
doc: {
htmlData: {
"#text": 'Some text <br/> and more <img src="test.png"/>',
},
},
},
],
})
})
it("should handle unterminated stopNode at end of stream", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["raw"],
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
let streamResult = parser.parseStream("<root><raw>This content is not closed")
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ root: { raw: { "#text": "This content is not closed" } } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [{ root: { raw: { "#text": "This content is not closed" } } }],
})
})
it("should correctly handle nested stopNodes of the same name", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["codeblock"],
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
const xml = "<doc><codeblock>Outer <codeblock>Inner</codeblock> Content</codeblock></doc>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
doc: {
codeblock: {
"#text": "Outer <codeblock>Inner</codeblock> Content",
},
},
},
],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
doc: {
codeblock: {
"#text": "Outer <codeblock>Inner</codeblock> Content",
},
},
},
],
})
})
it("should handle stopNode as the root element", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["rawhtml"],
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
let streamResult = parser.parseStream("<rawhtml><head></head><body><p>Hello</p></body></rawhtml>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ rawhtml: { "#text": "<head></head><body><p>Hello</p></body>" } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ rawhtml: { "#text": "<head></head><body><p>Hello</p></body>" } }],
})
})
it("should handle empty stopNode", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["emptyContent"],
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
let streamResult = parser.parseStream("<data><emptyContent></emptyContent></data>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ data: { emptyContent: { "#text": "" } } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ data: { emptyContent: { "#text": "" } } }],
})
})
it("should handle stopNode with only whitespace content", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["whitespaceNode"],
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
let streamResult = parser.parseStream("<data><whitespaceNode> \n\t </whitespaceNode></data>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ data: { whitespaceNode: { "#text": " \n\t " } } }],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ data: { whitespaceNode: { "#text": " \n\t " } } }],
})
})
it("should handle stopNode content split across multiple chunks", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["log"],
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
parser.parseStream("<system><log>Part 1 data ")
let streamResult = parser.parseStream("Part 2 data <inner>tag</inner> and more")
expect(streamResult).toEqual({
metadata: { partial: true },
xml: [
{
system: {
log: {
"#text": "Part 1 data Part 2 data <inner>tag</inner> and more",
},
},
},
],
})
streamResult = parser.parseStream(" final part.</log></system>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
system: {
log: {
"#text": "Part 1 data Part 2 data <inner>tag</inner> and more final part.",
},
},
},
],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
system: {
log: {
"#text": "Part 1 data Part 2 data <inner>tag</inner> and more final part.",
},
},
},
],
})
})
it("should handle stopNode with attributes and content split across chunks", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["customTag"],
attributeNamePrefix: "@",
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
parser.parseStream('<root><customTag id="123" ')
parser.parseStream('name="test">This is the ')
let streamResult = parser.parseStream("content with wewnętrzny tag <tag/>.</customTag></root>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
customTag: {
"@id": "123",
"@name": "test",
"#text": "This is the content with wewnętrzny tag <tag/>.",
},
},
},
],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
customTag: {
"@id": "123",
"@name": "test",
"#text": "This is the content with wewnętrzny tag <tag/>.",
},
},
},
],
})
})
it("should handle stop node when stopNodes option is a string", () => {
parser = new PartialXMLStreamParser({
stopNodes: "script",
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
let streamResult = parser.parseStream('<root><script>alert("hello");</script></root>')
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ root: { script: { "#text": 'alert("hello");' } } }],
})
})
it("should handle path-based stopNode correctly", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["read.file.metadata"],
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
const xml =
"<read><metadata><item>one</item></metadata><file><metadata><item>two</item><subitem>three</subitem></metadata><other>data</other></file></read>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
read: {
metadata: { item: { "#text": "one" } }, // Not a stopNode
file: {
metadata: {
"#text": "<item>two</item><subitem>three</subitem>",
}, // Is a stopNode
other: { "#text": "data" }, // Not a stopNode
},
},
},
],
})
streamResult = parser.parseStream(null)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
read: {
metadata: { item: { "#text": "one" } },
file: {
metadata: {
"#text": "<item>two</item><subitem>three</subitem>",
},
other: { "#text": "data" },
},
},
},
],
})
})
it("should prioritize path-based stopNode over simple name if both could match", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["read.file.metadata", "nomatch.metadata"], // read.file.metadata will match
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
const xml =
"<read><metadata><item>one</item></metadata><file><metadata><item>two</item></metadata></file></read>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
read: {
metadata: { item: { "#text": "one" } }, // Not a stopNode
file: {
metadata: { "#text": "<item>two</item>" }, // Is a stopNode due to path
},
},
},
],
})
})
it("should handle simple stopNode alongside path-based stopNode", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["script", "app.config.settings.value"],
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
const xml =
"<app><script>let x=1;</script><config><settings><value>secret</value><other>val</other></settings></config></app>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
app: {
script: { "#text": "let x=1;" }, // Simple stopNode
config: {
settings: {
value: { "#text": "secret" }, // Path-based stopNode
other: { "#text": "val" }, // Not a stopNode
},
},
},
},
],
})
})
it("should handle a stopNode whose text content contains its own closing tag string", () => {
// Parser from beforeEach might have other defaults, re-initialize for clarity
parser = new PartialXMLStreamParser({
stopNodes: ["c"], // Make 'c' a stopNode
textNodeName: "#text", // Consistent with other tests and default beforeEach
})
const message = `<a><b>src/file.ts</b><c>
\tfunction example() {
\t// This has XML-like content: </c>
\treturn true;
\t}
\t</c></a>`
const expectedOutput = {
metadata: { partial: false },
xml: [
{
a: {
b: { "#text": "src/file.ts" },
c: {
"#text":
"\n\tfunction example() {\n\t// This has XML-like content: </c>\n\treturn true;\n\t}\n\t",
},
},
},
],
}
let streamResult = parser.parseStream(message)
expect(streamResult).toEqual(expectedOutput)
// Test with null to ensure final state is also correct and parser considers it complete
streamResult = parser.parseStream(null)
expect(streamResult).toEqual(expectedOutput)
})
describe("stopNode wildcard patterns", () => {
it("should handle wildcard patterns with asterisk at end (prefix matching)", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["app.*"],
textNodeName: "#text",
})
const xml =
"<root><app><config><item>not parsed</item><value>123</value></config></app><app><settings><option>also not parsed</option></settings></app></root>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
app: [
{ config: { "#text": "<item>not parsed</item><value>123</value>" } },
{ settings: { "#text": "<option>also not parsed</option>" } },
],
},
},
],
})
})
it("should handle wildcard patterns with asterisk at beginning (suffix matching)", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["*.suggest"],
textNodeName: "#text",
})
const xml =
"<root><follow_up><suggest><option>Option 1</option><desc>Description</desc></suggest></follow_up><other><suggest><item>not parsed</item></suggest></other></root>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
follow_up: {
suggest: { "#text": "<option>Option 1</option><desc>Description</desc>" },
},
other: { suggest: { "#text": "<item>not parsed</item>" } },
},
},
],
})
})
it("should handle wildcard patterns in the middle", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["app.*.value"],
textNodeName: "#text",
})
const xml =
"<root><app><config><value><item>not parsed</item><data>123</data></value></config><settings><value><option>also not parsed</option></value></settings></app></root>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
app: {
config: { value: { "#text": "<item>not parsed</item><data>123</data>" } },
settings: { value: { "#text": "<option>also not parsed</option>" } },
},
},
},
],
})
})
it("should handle multiple wildcards in one pattern", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["*.config.*"],
textNodeName: "#text",
})
const xml =
"<root><app><config><value><item>not parsed</item></value><setting><data>also not parsed</data></setting></config></app><other><config><data><nested>not parsed either</nested></data></config></other></root>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
app: {
config: {
value: { "#text": "<item>not parsed</item>" },
setting: { "#text": "<data>also not parsed</data>" },
},
},
other: {
config: { data: { "#text": "<nested>not parsed either</nested>" } },
},
},
},
],
})
})
it("should handle wildcard patterns with suffix matching for longer paths", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["follow_up.*"],
textNodeName: "#text",
})
const xml =
"<ask_followup_question><question>What?</question><follow_up><suggest><option>Option 1</option></suggest><suggest><option>Option 2</option></suggest></follow_up></ask_followup_question>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
ask_followup_question: {
question: { "#text": "What?" },
follow_up: {
suggest: [
{ "#text": "<option>Option 1</option>" },
{ "#text": "<option>Option 2</option>" },
],
},
},
},
],
})
})
it("should handle complex wildcard patterns with exact and suffix matching", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["a.*"],
textNodeName: "#text",
})
const xml =
"<root><a><b><item>exact match not parsed</item></b></a><deep><nested><a><c><data>suffix match not parsed</data></c></a></nested></deep></root>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
a: { b: { "#text": "<item>exact match not parsed</item>" } },
deep: {
nested: {
a: { c: { "#text": "<data>suffix match not parsed</data>" } },
},
},
},
},
],
})
})
it("should handle wildcard patterns mixed with regular stopNodes", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["script", "app.*", "*.config"],
textNodeName: "#text",
})
const xml =
"<root><script>let x = <tag>not parsed</tag>;</script><app><settings><item>not parsed</item></settings></app><other><config><data>also not parsed</data></config></other></root>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
script: { "#text": "let x = <tag>not parsed</tag>;" },
app: { settings: { "#text": "<item>not parsed</item>" } },
other: { config: { "#text": "<data>also not parsed</data>" } },
},
},
],
})
})
it("should not match partial wildcard patterns", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["app.config.*"],
textNodeName: "#text",
})
const xml =
"<root><app><other><item>normal parsing</item></other></app><different><config><value><data>also normal</data></value></config></different></root>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
app: { other: { item: { "#text": "normal parsing" } } },
different: {
config: { value: { data: { "#text": "also normal" } } },
},
},
},
],
})
})
it("should handle wildcard patterns with attributes", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["app.*"],
textNodeName: "#text",
attributeNamePrefix: "@",
})
const xml =
'<root><app><config id="test"><item>not parsed</item><value attr="val">also not parsed</value></config></app></root>'
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
app: {
config: {
"@id": "test",
"#text": '<item>not parsed</item><value attr="val">also not parsed</value>',
},
},
},
},
],
})
})
})
describe("stopNode CDATA handling", () => {
it("should handle CDATA content inside stopnodes", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["script"],
textNodeName: "#text",
})
const input = `<root>
<script type="text/javascript">
<![CDATA[
if (x < y && z > 0) {
alert("Hello <world>!");
}
]]>
</script>
</root>`
const result = parser.parseStream(input)
// The CDATA content should be extracted and the CDATA markers should be removed
expect(result!.xml![0].root.script["#text"]).toBe(`
if (x < y && z > 0) {
alert("Hello <world>!");
}
`)
})
it("should handle multiple CDATA sections in stopnodes", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["code"],
textNodeName: "#text",
})
const input = `<root>
<code>
Some text before
<![CDATA[<tag>content</tag>]]>
Some text between
<![CDATA[more & content]]>
Some text after
</code>
</root>`
const result = parser.parseStream(input)
// CDATA markers should be removed, content should be preserved
expect(result!.xml![0].root.code["#text"]).toBe(`
Some text before
<tag>content</tag>
Some text between
more & content
Some text after
`)
})
it("should handle partial CDATA in streaming stopnodes", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["data"],
textNodeName: "#text",
})
// Stream the content in chunks
let result = parser.parseStream("<root><data><![CDATA[partial")
expect(result!.metadata.partial).toBe(true)
result = parser.parseStream(" content]]></data></root>")
expect(result!.metadata.partial).toBe(false)
// Should extract CDATA content properly
expect(result!.xml![0].root.data["#text"]).toBe("partial content")
})
it("should handle CDATA spanning across tag boundaries in stopnodes", () => {
parser = new PartialXMLStreamParser({
stopNodes: ["a.b"],
textNodeName: "#text",
})
const input = `<a><b><![CDATA[adsasdas</b>]]></b></a>`
const result = parser.parseStream(input)
// CDATA content should be extracted properly even when it spans across tag boundaries
expect(result!.xml![0].a.b["#text"]).toBe("adsasdas</b>")
})
})
})
describe("alwaysCreateTextNode option", () => {
it("should always create #text node when alwaysCreateTextNode is true for non-whitespace text", () => {
parser = new PartialXMLStreamParser({
alwaysCreateTextNode: true,
textNodeName: "#text",
})
let streamResult = parser.parseStream("<doc>Text</doc>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ doc: { "#text": "Text" } }],
})
})
it("should not simplify text-only elements if alwaysCreateTextNode is true", () => {
parser = new PartialXMLStreamParser({
alwaysCreateTextNode: true,
textNodeName: "#text",
})
let streamResult = parser.parseStream("<parent><child>simple text</child></parent>")
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ parent: { child: { "#text": "simple text" } } }],
})
})
it("should create #text for elements with attributes and text when alwaysCreateTextNode is true", () => {
parser = new PartialXMLStreamParser({
alwaysCreateTextNode: true,
textNodeName: "#text",
attributeNamePrefix: "@",
})
let streamResult = parser.parseStream('<parent><child attr="val">text content</child></parent>')
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [{ parent: { child: { "@attr": "val", "#text": "text content" } } }],
})
})
it("should handle mixed content with alwaysCreateTextNode true, omitting whitespace-only nodes", () => {
parser = new PartialXMLStreamParser({
alwaysCreateTextNode: true,
textNodeName: "#text",
})
const xml = "<root>text1 <item>itemtext</item> text2 <another/> text3</root>"
let streamResult = parser.parseStream(xml)
expect(streamResult).toEqual({
metadata: { partial: false },
xml: [
{
root: {
"#text": "text1 text2 text3",
item: { "#text": "itemtext" },
another: {},
},
},
],
})
})
})
describe("parsePrimitives option", () => {
it("should parse numbers and booleans in text nodes if parsePrimitives is true", () => {
parser = new PartialXMLStreamParser({
parsePrimitives: true,
textNodeName: "#text", // alwaysCreateTextNode is true by default
})
let streamResult = parser.parseStream(
"<data><num>123</num><bool>tru