partial-xml-stream-parser

Version:

A lenient XML stream parser for Node.js and browsers that can handle incomplete or malformed XML data, with depth control, CDATA support for XML serialization and round-trip parsing, wildcard pattern support for stopNodes, and CDATA handling within stopNo

github.com/samhvw8/partial-xml-stream-parser

samhvw8/partial-xml-stream-parser

321 lines (289 loc) • 9.52 kB

text/typescript

import { describe, test } from "vitest" import { PartialXMLStreamParser } from "../index" import { performance } from "perf_hooks" /** * Calculates statistical measures for a list of durations. */ function calculateStats(durations: number[]) { if (!durations || durations.length === 0) { return { min: 0, max: 0, median: 0, p95: 0, p99: 0, mean: 0, stdDev: 0 } } const sorted = [...durations].sort((a, b) => a - b) const N = sorted.length const mean = durations.reduce((a, b) => a + b, 0) / N const p95Index = Math.max(0, Math.min(N - 1, Math.floor(N * 0.95))) const p99Index = Math.max(0, Math.min(N - 1, Math.floor(N * 0.99))) const medianIndex = Math.max(0, Math.min(N - 1, Math.floor(N / 2))) return { min: sorted[0], max: sorted[N - 1], median: sorted[medianIndex], p95: sorted[p95Index], p99: sorted[p99Index], mean, stdDev: N > 0 ? Math.sqrt(durations.map((x) => Math.pow(x - mean, 2)).reduce((a, b) => a + b, 0) / N) : 0, } } /** * Gets an appropriate iteration count for benchmarks based on input size. */ function getIterationCount(size: number) { if (size <= 10000) return 100 // ~10KB if (size <= 100000) return 20 // ~100KB if (size <= 500000) return 10 // ~500KB return 5 // For very large tests } /** * Runs a benchmark for a given operation and logs time and heap statistics. */ function runParserBenchmark(name: string, benchmarkFn: () => void, xmlInputString?: string) { const inputLength = xmlInputString ? xmlInputString.length : 1000 const iterations = getIterationCount(inputLength) console.log(`\n--- Benchmark: ${name} (${iterations} iterations) ---`) // Warm-up try { benchmarkFn() } catch (e) { console.error(`Warm-up for "${name}" failed:`, e) return } if (global.gc) { global.gc() } const initialHeap = process.memoryUsage().heapUsed const durations: number[] = [] for (let i = 0; i < iterations; i++) { try { const startTime = performance.now() benchmarkFn() const endTime = performance.now() durations.push(endTime - startTime) } catch (e) { console.error(`Iteration ${i + 1} for "${name}" failed:`, e) durations.push(NaN) } } const validDurations = durations.filter((d) => !isNaN(d)) if (global.gc) { global.gc() } const finalHeap = process.memoryUsage().heapUsed if (validDurations.length === 0 && durations.length > 0) { console.log(" All iterations failed. No time statistics available.") } else if (validDurations.length < durations.length) { console.log( ` ${durations.length - validDurations.length} iterations failed. Statistics from ${validDurations.length} successful iterations:`, ) } const stats = calculateStats(validDurations) const totalTime = validDurations.reduce((a, b) => a + b, 0) console.log(" Time Metrics:") console.log(` Total: ${totalTime.toFixed(3)} ms (for ${validDurations.length} successful iterations)`) console.log(` Mean: ${stats.mean.toFixed(3)} ms`) console.log( ` Min: ${stats.min.toFixed(3)} ms, Max: ${stats.max.toFixed(3)} ms, Median: ${stats.median.toFixed(3)} ms`, ) console.log( ` P95: ${stats.p95.toFixed(3)} ms, P99: ${stats.p99.toFixed(3)} ms, StdDev: ${stats.stdDev.toFixed(3)} ms`, ) console.log(" Heap Metrics:") const toMB = (bytes: number) => (bytes / 1024 / 1024).toFixed(3) console.log(` Initial: ${toMB(initialHeap)} MB`) console.log(` Final: ${toMB(finalHeap)} MB`) console.log(` Delta: ${toMB(finalHeap - initialHeap)} MB`) } describe("PartialXMLStreamParser Benchmarks", () => { const simpleXML = '<root><item id="1">Text1</item><item id="2">Text2</item></root>' const complexXML = ` <data store="main"> <book category="COOKING"> <title lang="en">Everyday Italian</title> <author>Giada De Laurentiis</author> <year>2005</year> <price>30.00</price> </book> <book category="CHILDREN"> <title lang="en">Harry Potter</title> <author>J K. Rowling</author> <year>2005</year> <price>29.99</price> </book> <book category="WEB"> <title lang="en">Learning XML</title> <author>Erik T. Ray</author> <year>2003</year> <price>39.95</price> </book> <misc type="info">Some additional info here</misc> </data> ` const multiRootXML = "<item>A</item><item>B</item><item>C</item><item>D</item><item>E</item>" const cdataXML = "<root><data>This is some <CDATA> content with & symbols that should be preserved.</data></root>" const stopNodeXML = '<root><script type="text/javascript">function greet(){ console.log("hello <world>"); }</script><content>Parse me</content></root>' const generateLargeXML = (numItems: number) => { let xml = "<largeRoot>" for (let i = 0; i < numItems; i++) { xml += `<item id="${i}">` xml += `<name>Item Name ${i}</name>` xml += `<value>${Math.random() * 1000}</value>` xml += `<description>This is a description for item ${i}. It contains some text to make it a bit larger.</description>` xml += `<nested><subitem attr="sub${i}">Sub Value ${i}</subitem></nested>` xml += "</item>" } xml += "</largeRoot>" return xml } const largeXMLString = generateLargeXML(1000) // 1000 items test("Parse simple XML (single chunk)", () => { runParserBenchmark( "Parse simple XML (single chunk)", () => { const parser = new PartialXMLStreamParser() parser.parseStream(simpleXML) parser.parseStream(null) }, simpleXML, ) }) test("Parse complex XML (single chunk)", () => { runParserBenchmark( "Parse complex XML (single chunk)", () => { const parser = new PartialXMLStreamParser() parser.parseStream(complexXML) parser.parseStream(null) }, complexXML, ) }) test("Parse simple XML (multiple chunks)", () => { runParserBenchmark( "Parse simple XML (multiple chunks)", () => { const parser = new PartialXMLStreamParser() const chunks = simpleXML.match(/.{1,10}/g) || [simpleXML] chunks.forEach((chunk) => parser.parseStream(chunk)) parser.parseStream(null) }, simpleXML, ) }) test("Parse complex XML (multiple chunks)", () => { runParserBenchmark( "Parse complex XML (multiple chunks)", () => { const parser = new PartialXMLStreamParser() const chunks = complexXML.match(/.{1,50}/g) || [complexXML] chunks.forEach((chunk) => parser.parseStream(chunk)) parser.parseStream(null) }, complexXML, ) }) test("Parse XML with multiple root elements", () => { runParserBenchmark( "Parse XML with multiple root elements", () => { const parser = new PartialXMLStreamParser() parser.parseStream(multiRootXML) parser.parseStream(null) }, multiRootXML, ) }) test("Parse XML with CDATA", () => { runParserBenchmark( "Parse XML with CDATA", () => { const parser = new PartialXMLStreamParser() parser.parseStream(cdataXML) parser.parseStream(null) }, cdataXML, ) }) test("Parse XML with stop nodes", () => { runParserBenchmark( "Parse XML with stop nodes", () => { const parser = new PartialXMLStreamParser({ stopNodes: ["script"] }) parser.parseStream(stopNodeXML) parser.parseStream(null) }, stopNodeXML, ) }) test("Parse XML with alwaysCreateTextNode true", () => { runParserBenchmark( "Parse XML with alwaysCreateTextNode true", () => { const parser = new PartialXMLStreamParser({ alwaysCreateTextNode: true, }) parser.parseStream(complexXML) parser.parseStream(null) }, complexXML, ) }) test("Parse XML with parsePrimitives true", () => { runParserBenchmark( "Parse XML with parsePrimitives true", () => { const parser = new PartialXMLStreamParser({ parsePrimitives: true }) parser.parseStream(complexXML) parser.parseStream(null) }, complexXML, ) }) test("Parse large XML (1000 items, single chunk)", () => { runParserBenchmark( "Parse large XML (1000 items, single chunk)", () => { const parser = new PartialXMLStreamParser() parser.parseStream(largeXMLString) parser.parseStream(null) }, largeXMLString, ) }) test("Parse large XML (1000 items, multiple chunks)", () => { runParserBenchmark( "Parse large XML (1000 items, multiple chunks)", () => { const parser = new PartialXMLStreamParser() const chunks = largeXMLString.match(/.{1,1024}/g) || [largeXMLString] chunks.forEach((chunk) => parser.parseStream(chunk)) parser.parseStream(null) }, largeXMLString, ) }) test("Parse very large XML (5000 items, single chunk)", () => { const veryLargeXMLString = generateLargeXML(5000) runParserBenchmark( "Parse very large XML (5000 items, single chunk)", () => { const parser = new PartialXMLStreamParser() parser.parseStream(veryLargeXMLString) parser.parseStream(null) }, veryLargeXMLString, ) }) test("Parse very large XML (5000 items, multiple chunks)", () => { const veryLargeXMLString = generateLargeXML(5000) runParserBenchmark( "Parse very large XML (5000 items, multiple chunks)", () => { const parser = new PartialXMLStreamParser() const chunks = veryLargeXMLString.match(/.{1,1024}/g) || [veryLargeXMLString] chunks.forEach((chunk) => parser.parseStream(chunk)) parser.parseStream(null) }, veryLargeXMLString, ) }) })