UNPKG

partial-xml-stream-parser

Version:

A lenient XML stream parser for Node.js and browsers that can handle incomplete or malformed XML data, with depth control, CDATA support for XML serialization and round-trip parsing, wildcard pattern support for stopNodes, and CDATA handling within stopNo

github.com/samhvw8/partial-xml-stream-parser

samhvw8/partial-xml-stream-parser

150 lines • 6.69 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.handleIncompleteState = handleIncompleteState; const dom_builder_1 = require("./dom-builder"); const utils_1 = require("./utils"); /** * Handles text content for CDATA and stop node sections */ function handleTextContent(context, content, target, textNodeName) { if (!content || content.length === 0) return; // Process CDATA content in stopnodes const processedContent = (0, utils_1.processCDATAInStopnode)(content); const textToAdd = context.customOptions.parsePrimitives ? (0, utils_1.tryParsePrimitive)(processedContent) : processedContent; (0, dom_builder_1.addValueToObject)(target, textNodeName, textToAdd, context.customOptions); } /** * Creates and caches a regex pattern for finding XML tags */ function getOrCreateTagPattern(tagName, cache) { if (!cache[tagName]) { const pattern = `<\\s*${tagName}(?:\\s[^>]*)?>|<\\/\\s*${tagName}\\s*>`; cache[tagName] = new RegExp(pattern, "g"); } return cache[tagName]; } /** * Processes incomplete XML parsing states */ function handleIncompleteState(parserContext) { const { incompleteStructureState: state, streamingBuffer: buffer, customOptions, parsingIndex, tagStack, currentPointer, accumulator, } = parserContext; if (!state) { return { shouldReturn: false }; } const searchStartIndex = Math.max(parsingIndex, state.at || 0); const bufferLength = buffer.length; const textNodeName = customOptions.textNodeName; let endIdx; switch (state.type) { case "comment": case "doctype": case "xmldecl": { endIdx = buffer.indexOf(state.lookingFor, searchStartIndex); if (endIdx !== -1 && endIdx >= (state.at || 0)) { parserContext.parsingIndex = endIdx + state.lookingFor.length; parserContext.incompleteStructureState = null; } else { return { shouldReturn: true }; } break; } case "cdata": { const cdataCloseMarker = state.lookingFor; endIdx = buffer.indexOf(cdataCloseMarker, parsingIndex); if (endIdx === -1) { const newContent = buffer.substring(parsingIndex, bufferLength); if (newContent.length > 0) { state.partialData = (state.partialData || "") + newContent; if (tagStack.length > 0 && currentPointer) { handleTextContent(parserContext, newContent, currentPointer, textNodeName); } } parserContext.parsingIndex = bufferLength; return { shouldReturn: true }; } const newSegment = buffer.substring(parsingIndex, endIdx); const fullContent = (state.partialData || "") + newSegment; if (fullContent.length > 0) { if (tagStack.length > 0 && currentPointer) { handleTextContent(parserContext, fullContent, currentPointer, textNodeName); } else if (tagStack.length === 0) { accumulator.push(customOptions.parsePrimitives ? (0, utils_1.tryParsePrimitive)(fullContent) : fullContent); } } parserContext.parsingIndex = endIdx + cdataCloseMarker.length; parserContext.incompleteStructureState = null; break; } case "tag_start_incomplete": { if (parserContext.parsingIndex + 1 < bufferLength) { parserContext.incompleteStructureState = null; } else { return { shouldReturn: true }; } break; } case "opening_tag_incomplete": case "closing_tag_incomplete": { if (state.at !== undefined) { const tagType = state.type === "opening_tag_incomplete" ? "opening" : "closing"; parserContext.parsingIndex = state.at; parserContext.reparsedSegmentContext = { originalIndex: state.at, partialText: state.partial || "", parentContext: state.parentOfPartial, tagType, }; } parserContext.incompleteStructureState = null; break; } case "stop_node_content": { const { tagName: stopNodeTagName, stopNodeObjectRef } = state; let { depth: stopNodeDepth } = state; let currentSearchPos = parsingIndex; const contentSearchRegex = getOrCreateTagPattern(stopNodeTagName, parserContext.stopNodeRegexCache); contentSearchRegex.lastIndex = currentSearchPos; let rawContentEnd = -1; let closingTagLength = 0; let match; while (currentSearchPos < bufferLength && (match = contentSearchRegex.exec(buffer))) { const matchedTag = match[0]; const isClosingTag = matchedTag.startsWith("</") || matchedTag.startsWith("<\\/"); const isSelfClosing = /\/\s*>$/.test(matchedTag); if (isClosingTag && --stopNodeDepth === 0) { rawContentEnd = match.index; closingTagLength = matchedTag.length; break; } else if (!isClosingTag && !isSelfClosing) { stopNodeDepth++; } currentSearchPos = contentSearchRegex.lastIndex; } if (rawContentEnd === -1) { const newContent = buffer.substring(parsingIndex, bufferLength); if (newContent.length > 0) { handleTextContent(parserContext, newContent, stopNodeObjectRef, textNodeName); } parserContext.parsingIndex = bufferLength; if (parserContext.incompleteStructureState) { parserContext.incompleteStructureState.depth = stopNodeDepth; } return { shouldReturn: true }; } const newSegment = buffer.substring(parsingIndex, rawContentEnd); handleTextContent(parserContext, newSegment, stopNodeObjectRef, textNodeName); parserContext.parsingIndex = rawContentEnd + closingTagLength; parserContext.incompleteStructureState = null; break; } default: parserContext.incompleteStructureState = null; } return { shouldReturn: false }; } //# sourceMappingURL=state-processor.js.map