UNPKG

typesxml

Version:

Open source XML library written in TypeScript

1,166 lines 74.8 kB
/******************************************************************************* * Copyright (c) 2023-2026 Maxprograms. * * This program and the accompanying materials * are made available under the terms of the Eclipse Public License 1.0 * which accompanies this distribution, and is available at * https://www.eclipse.org/org/documents/epl-v10.html * * Contributors: * Maxprograms - initial API and implementation *******************************************************************************/ import { closeSync, openSync, readSync, statSync } from "node:fs"; import { dirname, sep } from "node:path"; import { FileReader } from "../FileReader.js"; import { XMLUtils } from "../XMLUtils.js"; import { AttListDecl } from "./AttListDecl.js"; import { DTDGrammar } from "./DTDGrammar.js"; import { ElementDecl } from "./ElementDecl.js"; import { EntityDecl } from "./EntityDecl.js"; import { NotationDecl } from "./NotationDecl.js"; export class DTDParser { grammar; catalog; pointer = 0; source = ''; currentFile = ''; baseDirectory = ''; validating = false; overrideExistingDeclarations = false; preexistingEntityKeys = new Set(); preexistingAttributeKeys = new Map(); unresolvedExternalEntities = new Map(); parsingInternalSubset = false; xmlVersion = '1.0'; openConditionalSections = 0; constructor(grammar, baseDirectory) { if (grammar) { this.grammar = grammar; } else { this.grammar = new DTDGrammar(); } if (baseDirectory) { this.baseDirectory = baseDirectory; } } setGrammar(grammar) { this.grammar = grammar; } setOverrideExistingDeclarations(override) { this.overrideExistingDeclarations = override; } setValidating(validating) { this.validating = validating; } setCatalog(catalog) { this.catalog = catalog; } setXmlVersion(version) { if (version === '1.1') { this.xmlVersion = '1.1'; return; } this.xmlVersion = '1.0'; } parseDTD(file) { this.parseFile(file); this.grammar.processModels(); if (this.openConditionalSections !== 0) { throw new Error("Malformed conditional section: missing closing ']]>'"); } return this.grammar; } parseFile(file) { const previousInternalSubsetFlag = this.parsingInternalSubset; this.parsingInternalSubset = false; try { this.source = ''; let stats = statSync(file, { bigint: false, throwIfNoEntry: true }); this.currentFile = file; let blockSize = stats.blksize; let fileHandle = openSync(file, 'r'); let buffer = Buffer.alloc(blockSize); let bytesRead = readSync(fileHandle, buffer, 0, blockSize, 0); while (bytesRead > 0) { this.source += buffer.toString('utf8', 0, bytesRead); bytesRead = readSync(fileHandle, buffer, 0, blockSize, this.source.length); } closeSync(fileHandle); return this.parse(); } finally { this.parsingInternalSubset = previousInternalSubsetFlag; } } parseString(source) { const previousInternalSubsetFlag = this.parsingInternalSubset; this.parsingInternalSubset = true; try { this.source = source; this.parse(); this.grammar.processModels(); return this.grammar; } finally { this.parsingInternalSubset = previousInternalSubsetFlag; } } parse() { this.pointer = 0; this.preexistingEntityKeys = new Set(); this.preexistingAttributeKeys = new Map(); this.unresolvedExternalEntities.clear(); this.openConditionalSections = 0; if (this.overrideExistingDeclarations) { for (const key of this.grammar.getEntitiesMap().keys()) { this.preexistingEntityKeys.add(key); } this.grammar.getAttributesMap().forEach((attributes, element) => { const attributeNames = new Set(); attributes.forEach((_value, name) => { attributeNames.add(name); }); this.preexistingAttributeKeys.set(element, attributeNames); }); } while (this.pointer < this.source.length) { if (this.lookingAt('<!ELEMENT')) { let index = this.findDeclarationEnd(this.pointer); if (index === -1) { throw new Error('Malformed element declaration'); } let elementText = this.source.substring(this.pointer, index + '>'.length); let length = elementText.length; let elementDecl = this.parseElementDeclaration(elementText); this.grammar.addElement(elementDecl, this.overrideExistingDeclarations); this.pointer += length; continue; } if (this.lookingAt('<!ATTLIST')) { let index = this.findDeclarationEnd(this.pointer); if (index === -1) { throw new Error('Malformed attribute declaration'); } let attListText = this.source.substring(this.pointer, index + '>'.length); let length = attListText.length; let attList = this.parseAttributesListDeclaration(attListText); const preexisting = this.overrideExistingDeclarations ? this.preexistingAttributeKeys.get(attList.getName()) : undefined; this.grammar.addAttributes(attList.getName(), attList.getAttributes(), this.overrideExistingDeclarations, preexisting); this.pointer += length; continue; } if (this.lookingAt('<!ENTITY')) { let index = this.findDeclarationEnd(this.pointer); if (index === -1) { throw new Error('Malformed entity declaration'); } let entityDeclText = this.source.substring(this.pointer, index + '>'.length); let entityDecl = this.parseEntityDeclaration(entityDeclText); const entityKey = entityDecl.isParameterEntity() ? `%${entityDecl.getName()}` : entityDecl.getName(); const alreadyDeclared = this.grammar.getEntitiesMap().has(entityKey); const existedBeforeParse = this.preexistingEntityKeys.has(entityKey); if (alreadyDeclared && this.overrideExistingDeclarations && !existedBeforeParse) { this.pointer += entityDeclText.length; continue; } this.grammar.addEntity(entityDecl, this.overrideExistingDeclarations && existedBeforeParse); this.pointer += entityDeclText.length; continue; } if (this.lookingAt('<!NOTATION')) { let index = this.findDeclarationEnd(this.pointer); if (index === -1) { throw new Error('Malformed notation declaration'); } let notationDeclText = this.source.substring(this.pointer, index + '>'.length); if (XMLUtils.hasParameterEntity(notationDeclText)) { notationDeclText = this.resolveEntities(notationDeclText); } let notation = this.parseNotationDeclaration(notationDeclText); this.grammar.addNotation(notation, this.overrideExistingDeclarations); this.pointer += notationDeclText.length; continue; } if (this.lookingAt('<![')) { this.parseConditionalSection(); continue; } if (this.lookingAt(']]>')) { this.endConditionalSection(); continue; } if (this.lookingAt('<?')) { let index = this.source.indexOf('?>', this.pointer); if (index === -1) { throw new Error('Malformed processing instruction'); } // skip processing instructions this.pointer = index + '?>'.length; continue; } if (this.lookingAt('<!--')) { let index = this.source.indexOf('-->', this.pointer); if (index === -1) { throw new Error('Malformed comment'); } // skip comments this.pointer = index + '-->'.length; continue; } if (this.lookingAt('%')) { let index = this.source.indexOf(';', this.pointer); if (index == -1) { throw new Error('Malformed entity reference'); } let entityName = this.source.substring(this.pointer + '%'.length, index); let entity = this.grammar.getParameterEntity(entityName); if (!entity && this.catalog) { let entityLocation = this.catalog.matchPublic(entityName); if (entityLocation) { try { // For external entity references like %xs-datatypes;, we need to create // an entity that contains the entire external file content let externalContent = this.readFileContent(entityLocation); let externalEntity = new EntityDecl(entityName, true, externalContent, '', '', ''); this.grammar.addEntity(externalEntity, this.overrideExistingDeclarations); entity = externalEntity; // Also extract any entity declarations from the external file // for potential future use this.extractAndImportEntities(entityLocation); } catch (parseError) { console.warn(`Warning: Could not extract entities from ${entityLocation}: ${parseError.message}`); // Continue without the external entities - they might be defined elsewhere } } else { console.warn('entity not found in catalog: ' + entityName); } } if (entity === undefined) { throw new Error('Unknown entity: ' + entityName + ' in parsing loop'); } let value = entity.getValue(); if (value !== '') { let start = this.source.substring(0, this.pointer); let end = this.source.substring(index + ';'.length); this.source = start + value + end; } else if (entity.getSystemId() !== '' || entity.getPublicId() !== '') { let location = this.resolveEntity(entity.getPublicId(), entity.getSystemId()); let parser = new DTDParser(this.grammar); parser.setXmlVersion(this.xmlVersion); parser.setValidating(this.validating); if (this.catalog) { parser.setCatalog(this.catalog); } let externalGrammar = parser.parseFile(location); this.grammar.merge(externalGrammar); this.pointer = index + ';'.length; } else { // empty entity, ignore this.pointer = index + ';'.length; } continue; } let char = this.source.charAt(this.pointer); if (XMLUtils.isXmlSpace(char)) { this.pointer++; continue; } throw new Error('Error parsing ' + this.currentFile + ' at ' + this.source.substring(this.pointer - 10, this.pointer) + ' @ ' + this.source.substring(this.pointer, this.pointer + 30)); } return this.grammar; } importAllEntities(sourceGrammar, targetGrammar) { // Import all regular entities sourceGrammar.getEntitiesMap().forEach((entity, name) => { if (!targetGrammar.getEntity(name)) { targetGrammar.addEntity(entity); } }); // Import elements if they don't conflict sourceGrammar.getElementDeclMap().forEach((element, name) => { if (!targetGrammar.getElementDeclMap().has(name)) { targetGrammar.addElement(element); } }); // Import attribute lists sourceGrammar.getAttributesMap().forEach((attributes, elementName) => { let existingAttributes = targetGrammar.getAttributesMap().get(elementName); if (!existingAttributes || existingAttributes.size === 0) { targetGrammar.addAttributes(elementName, attributes); } }); // Import notations sourceGrammar.getNotationsMap().forEach((notation, name) => { if (!targetGrammar.getNotationsMap().has(name)) { targetGrammar.addNotation(notation); } }); } extractAndImportEntities(filePath) { try { // Read the external DTD content and process it in the current parser context // This ensures parameter entities from the main DTD are available let content = this.readFileContent(filePath); let originalFile = this.currentFile; let originalSource = this.source; let originalPointer = this.pointer; // Temporarily switch context to external file this.currentFile = filePath; this.source = content; this.pointer = 0; try { // Parse the external DTD content in the current context this.parse(); } finally { // Restore original context this.currentFile = originalFile; this.source = originalSource; this.pointer = originalPointer; } } catch (error) { if (this.validating) { throw error; } console.warn(`Warning: Could not parse external DTD file ${filePath}: ${error.message}`); } } readFileContent(filePath) { let stats = statSync(filePath, { bigint: false, throwIfNoEntry: true }); let blockSize = stats.blksize; let fileHandle = openSync(filePath, 'r'); let buffer = Buffer.alloc(blockSize); let content = ''; let bytesRead = readSync(fileHandle, buffer, 0, blockSize, 0); while (bytesRead > 0) { content += buffer.toString('utf8', 0, bytesRead); bytesRead = readSync(fileHandle, buffer, 0, blockSize, content.length); } closeSync(fileHandle); return content; } endConditionalSection() { if (this.openConditionalSections === 0) { throw new Error("Malformed conditional section: unexpected closing ']]>' without matching '<!['"); } this.openConditionalSections--; // jump over ]]> this.pointer += ']]>'.length; } parseConditionalSection() { this.pointer += '<!['.length; // skip spaces before section keyword for (; this.pointer < this.source.length; this.pointer++) { let char = this.source.charAt(this.pointer); if (!XMLUtils.isXmlSpace(char)) { break; } } // read section keyword let keyword = ''; for (; this.pointer < this.source.length; this.pointer++) { let char = this.source.charAt(this.pointer); if (XMLUtils.isXmlSpace(char) || char === '[') { break; } keyword += char; } if (XMLUtils.hasParameterEntity(keyword)) { let resolvedKeyword = this.resolveEntities(keyword); const bracketIndex = resolvedKeyword.indexOf('['); if (bracketIndex !== -1) { const remainder = resolvedKeyword.substring(bracketIndex + 1); resolvedKeyword = resolvedKeyword.substring(0, bracketIndex); if (this.source.charAt(this.pointer) !== '[') { this.source = this.source.substring(0, this.pointer) + '[' + remainder + this.source.substring(this.pointer); } else if (remainder.length > 0) { const insertionIndex = this.pointer + 1; this.source = this.source.substring(0, insertionIndex) + remainder + this.source.substring(insertionIndex); } } keyword = resolvedKeyword.trim(); } if ('INCLUDE' === keyword) { // jump to the start of the content for (; this.pointer < this.source.length; this.pointer++) { let char = this.source.charAt(this.pointer); if (char === '[') { break; } } this.pointer++; this.openConditionalSections++; } else if ('IGNORE' === keyword) { this.skipIgnoreSection(); } else { throw new Error('Malformed conditional section'); } } skipIgnoreSection() { let stack = 1; while (this.pointer < this.source.length) { if (this.lookingAt('<![')) { stack++; this.pointer += '<!['.length; } else if (this.lookingAt(']]>')) { stack--; this.pointer += ']]>'.length; if (stack === 0) { return; } } else { this.pointer++; } } throw new Error("Malformed conditional section: conditional IGNORE section not closed with ']]>'"); } resolveEntities(fragment, depth = 0) { if (depth > 50) { throw new Error('Parameter entity resolution depth exceeded (possible recursion in parameter entities)'); } let result = ''; let inQuotes = false; let quoteChar = ''; let index = 0; while (index < fragment.length) { const char = fragment.charAt(index); if (char === '%') { if (inQuotes && depth === 0) { result += char; index++; continue; } const end = fragment.indexOf(';', index + 1); if (end === -1) { throw new Error('Malformed parameter entity reference while resolving "' + fragment + '"'); } const entityName = fragment.substring(index + 1, end).trim(); if (entityName.length === 0) { result += fragment.substring(index, end + 1); index = end + 1; continue; } const entity = this.grammar.getParameterEntity(entityName); if (entity === undefined) { const context = fragment.substring(index, Math.min(fragment.length, index + 80)); throw new Error('Unknown entity: ' + entityName + ' in resolveEntities while processing "' + context + '"'); } if (entity.isExternal() && !entity.isExternalContentLoaded()) { const externalText = this.loadExternalEntity(entity.getPublicId(), entity.getSystemId(), true, entity.getName(), entity.isParameterEntity()); entity.setValue(externalText); } let replacement = entity.getValue(); if (replacement !== '') { replacement = this.resolveEntities(replacement, depth + 1); const beforeChar = result.length > 0 ? result.charAt(result.length - 1) : ''; const afterChar = (end + 1) < fragment.length ? fragment.charAt(end + 1) : ''; const originalBeforeChar = index > 0 ? fragment.charAt(index - 1) : ''; const originalAfterChar = afterChar; if (this.needsSeparatorBefore(beforeChar, replacement, originalBeforeChar)) { replacement = ' ' + replacement; } if (this.needsSeparatorAfter(afterChar, replacement, originalAfterChar)) { replacement = replacement + ' '; } result += replacement; } index = end + 1; continue; } if (inQuotes) { result += char; index++; if (char === quoteChar) { inQuotes = false; quoteChar = ''; } continue; } if (char === '"' || char === "'") { inQuotes = true; quoteChar = char; result += char; index++; continue; } result += char; index++; } return result; } parseEntityDeclaration(declaration) { this.requireWhitespaceAfterKeyword(declaration, '<!ENTITY', 'ENTITY declaration'); let name = ''; let i = '<!ENTITY'.length; let char = declaration.charAt(i); // skip spaces before % or entity name for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } let parameterEntity = false; if (char === '%') { parameterEntity = true; // skip spaces before name i++; for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } } // get entity name for (; i < declaration.length; i++) { char = declaration.charAt(i); if (XMLUtils.isXmlSpace(char)) { break; } name += char; } // Validate entity name if (!XMLUtils.isValidXMLName(name)) { throw new Error(`Invalid entity name in DTD: "${name}" - XML names must be valid`); } if (XMLUtils.hasParameterEntity(name)) { name = this.resolveEntities(name); } // skip spaces before entity value or external id for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } if (parameterEntity) { // can have value or external id if (XMLUtils.lookingAt('PUBLIC', declaration, i)) { i += 'PUBLIC'.length; // skip spaces before public id for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } let separator = declaration.charAt(i); i++; // skip opening " // get public id let publicId = ''; for (; i < declaration.length; i++) { char = declaration.charAt(i); if (char === separator) { break; } publicId += char; } i++; // skip closing " if (XMLUtils.hasParameterEntity(publicId)) { publicId = this.resolveEntities(publicId); } // skip spaces before system id for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } separator = declaration.charAt(i); i++; // skip opening " // get system id let systemId = ''; for (; i < declaration.length; i++) { char = declaration.charAt(i); if (char === separator) { break; } systemId += char; } if (XMLUtils.hasParameterEntity(systemId)) { systemId = this.resolveEntities(systemId); } // Don't load external entity content during DTD parsing - load lazily when referenced return this.attachUnresolvedError(name, new EntityDecl(name, parameterEntity, '', systemId, publicId, '')); } else if (XMLUtils.lookingAt('SYSTEM', declaration, i)) { // skip spaces before system id i += 'SYSTEM'.length; for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } let separator = declaration.charAt(i); i++; // skip opening " // get system id let systemId = ''; for (; i < declaration.length; i++) { char = declaration.charAt(i); if (char === separator) { break; } systemId += char; } if (XMLUtils.hasParameterEntity(systemId)) { systemId = this.resolveEntities(systemId); } // Don't load external entity content during DTD parsing - load lazily when referenced return this.attachUnresolvedError(name, new EntityDecl(name, parameterEntity, '', systemId, '', '')); } else { // get entity value let separator = declaration.charAt(i); i++; // skip opening " let value = ''; for (; i < declaration.length; i++) { char = declaration.charAt(i); if (char === separator) { break; } value += char; } const location = this.currentFile || this.baseDirectory || 'DTD'; this.ensureParameterReferenceSyntax(value, 'parameter entity', `%${name}`, location); if (this.parsingInternalSubset && this.containsParameterEntityReference(value)) { const where = location ? ` in ${location}` : ''; throw new Error(`Invalid parameter entity "%${name}"${where}: parameter entity references are not allowed in replacement text within the internal subset`); } value = this.normalizeEntityLiteral(value); this.validateParameterEntityValue(value, name, location); this.validateParsedEntityValue(value, name, location, true); return this.attachUnresolvedError(name, new EntityDecl(name, parameterEntity, value, '', '', '')); } } else { // Not a parameterEntity. Similar, but may declare NDATA if (XMLUtils.lookingAt('PUBLIC', declaration, i)) { i += 'PUBLIC'.length; // skip spaces before public id for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } let separator = declaration.charAt(i); i++; // skip " // get public id let publicId = ''; for (; i < declaration.length; i++) { char = declaration.charAt(i); if (char === separator) { break; } publicId += char; } i++; // skip closing " if (XMLUtils.hasParameterEntity(publicId)) { publicId = this.resolveEntities(publicId); } // skip spaces before system id for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } separator = declaration.charAt(i); i++; // skip " // get system id let systemId = ''; for (; i < declaration.length; i++) { char = declaration.charAt(i); if (char === separator) { break; } systemId += char; } i++; // skip closing " if (XMLUtils.hasParameterEntity(systemId)) { systemId = this.resolveEntities(systemId); } // skip spaces before NDATA for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } if (XMLUtils.lookingAt('NDATA', declaration, i)) { i += 'NDATA'.length; // skip spaces before ndata name for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } // get ndata name let ndata = ''; for (; i < declaration.length; i++) { char = declaration.charAt(i); if (XMLUtils.isXmlSpace(char)) { break; } ndata += char; } if (XMLUtils.hasParameterEntity(ndata)) { ndata = this.resolveEntities(ndata); } return this.attachUnresolvedError(name, new EntityDecl(name, parameterEntity, '', systemId, publicId, ndata)); } const externalValue = this.loadExternalEntity(publicId, systemId, false, name, parameterEntity); return this.attachUnresolvedError(name, new EntityDecl(name, parameterEntity, externalValue, systemId, publicId, '')); } else if (XMLUtils.lookingAt('SYSTEM', declaration, i)) { i += 'SYSTEM'.length; // skip spaces before system id for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } let separator = declaration.charAt(i); i++; // skip " // get system id let systemId = ''; for (; i < declaration.length; i++) { char = declaration.charAt(i); if (char === separator) { break; } systemId += char; } if (XMLUtils.hasParameterEntity(systemId)) { systemId = this.resolveEntities(systemId); } // skip spaces before NDATA for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } if (XMLUtils.lookingAt('NDATA', declaration, i)) { i += 'NDATA'.length; // skip spaces before ndata name for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } // get ndata name let ndata = ''; for (; i < declaration.length; i++) { char = declaration.charAt(i); if (XMLUtils.isXmlSpace(char)) { break; } ndata += char; } if (XMLUtils.hasParameterEntity(ndata)) { ndata = this.resolveEntities(ndata); } // NDATA entities are unparsed and shouldn't have content loaded return this.attachUnresolvedError(name, new EntityDecl(name, parameterEntity, '', systemId, '', ndata)); } const externalValue = this.loadExternalEntity('', systemId, false, name, parameterEntity); return this.attachUnresolvedError(name, new EntityDecl(name, parameterEntity, externalValue, systemId, '', '')); } else { // get entity value let separator = declaration.charAt(i); i++; // skip " let value = ''; for (; i < declaration.length; i++) { char = declaration.charAt(i); if (char === separator) { break; } value += char; } const location = this.currentFile || this.baseDirectory || 'DTD'; this.ensureParameterReferenceSyntax(value, 'general entity', name, location); if (XMLUtils.hasParameterEntity(value)) { if (this.parsingInternalSubset) { const locationInfo = this.currentFile || this.baseDirectory || 'DTD'; const where = locationInfo ? ` in ${locationInfo}` : ''; throw new Error(`Invalid general entity "${name}"${where}: parameter entity references are not allowed in replacement text within the internal subset`); } value = this.resolveEntities(value); } value = this.normalizeEntityLiteral(value); this.validateParsedEntityValue(value, name, location, false); return this.attachUnresolvedError(name, new EntityDecl(name, parameterEntity, value, '', '', '')); } } } attachUnresolvedError(name, entityDecl) { const unresolvedError = this.unresolvedExternalEntities.get(name); if (unresolvedError) { entityDecl.markUnresolved(unresolvedError); this.unresolvedExternalEntities.delete(name); } return entityDecl; } normalizeEntityLiteral(value) { // XML 1.0 section 2.11: normalize CRLF and CR to LF within entity values. let normalized = value.replaceAll('\r\n', '\n'); normalized = normalized.replaceAll('\r', '\n'); return normalized; } validateParameterEntityValue(content, entityName, location) { if (!this.validating) { return; } if (content.length === 0) { return; } const where = location ? ` in ${location}` : ''; let inSingleQuote = false; let inDoubleQuote = false; let parenDepth = 0; let index = 0; while (index < content.length) { const char = content.charAt(index); if (!inDoubleQuote && char === "'") { inSingleQuote = !inSingleQuote; index++; continue; } if (!inSingleQuote && char === '"') { inDoubleQuote = !inDoubleQuote; index++; continue; } if (inSingleQuote || inDoubleQuote) { index++; continue; } if (char === '(') { parenDepth++; index++; continue; } if (char === ')') { if (parenDepth === 0) { throw new Error(`Invalid parameter entity "%${entityName}"${where}: unmatched ')' in replacement text`); } parenDepth--; index++; continue; } if (content.startsWith('<!--', index)) { const commentEnd = content.indexOf('-->', index + 4); if (commentEnd === -1) { throw new Error(`Invalid parameter entity "%${entityName}"${where}: comment opened but not closed`); } index = commentEnd + 3; continue; } if (content.startsWith('<![', index)) { const sectionEnd = content.indexOf(']]>', index + 3); if (sectionEnd === -1) { throw new Error(`Invalid parameter entity "%${entityName}"${where}: conditional section opened but not closed`); } index = sectionEnd + 3; continue; } if (content.startsWith('<!', index)) { const markupEnd = content.indexOf('>', index + 2); if (markupEnd === -1) { throw new Error(`Invalid parameter entity "%${entityName}"${where}: markup declaration started but not closed`); } index = markupEnd + 1; continue; } index++; } if (inSingleQuote || inDoubleQuote) { throw new Error(`Invalid parameter entity "%${entityName}"${where}: quote mismatch in replacement text`); } if (parenDepth !== 0) { throw new Error(`Invalid parameter entity "%${entityName}"${where}: parentheses are not balanced`); } } ensureParameterReferenceSyntax(text, entityType, entityLabel, location) { if (!this.validating || text.length === 0) { return; } const where = location ? ` in ${location}` : ''; let index = 0; while (index < text.length) { if (text.charAt(index) !== '%') { index++; continue; } const refEnd = this.readParameterEntityReference(text, index); if (refEnd === null) { throw new Error(`Invalid ${entityType} "${entityLabel}"${where}: malformed parameter entity reference in replacement text`); } index = refEnd + 1; } } validateParsedEntityValue(content, entityName, location, isParameterEntity) { if (!this.validating || content.length === 0) { return; } const where = location ? ` in ${location}` : ''; const entityLabel = isParameterEntity ? `%${entityName}` : entityName; const entityType = isParameterEntity ? 'parameter entity' : 'general entity'; if (!isParameterEntity) { this.ensureGeneralEntityDelimitersBalanced(content, entityLabel, where); } XMLUtils.ensureValidXmlCharacters(this.xmlVersion, content, `${entityType} "${entityLabel}" replacement text${where}`); let index = 0; while (index < content.length) { const char = content.charAt(index); if (char !== '&') { index++; continue; } if (index + 1 >= content.length) { throw new Error(`Invalid ${entityType} "${entityLabel}"${where}: unterminated entity reference in replacement text`); } const following = content.charAt(index + 1); if (following === '#') { let referenceIndex = index + 2; if (referenceIndex >= content.length) { throw new Error(`Invalid ${entityType} "${entityLabel}"${where}: malformed character reference in replacement text`); } const radixChar = content.charAt(referenceIndex); let validDigits; if (radixChar === 'x' || radixChar === 'X') { referenceIndex++; validDigits = /^[0-9a-fA-F]$/; } else { validDigits = /^[0-9]$/; } let digitCount = 0; while (referenceIndex < content.length) { const current = content.charAt(referenceIndex); if (current === ';') { break; } if (!validDigits.test(current)) { throw new Error(`Invalid ${entityType} "${entityLabel}"${where}: malformed character reference in replacement text`); } digitCount++; referenceIndex++; } if (digitCount === 0 || referenceIndex >= content.length || content.charAt(referenceIndex) !== ';') { throw new Error(`Invalid ${entityType} "${entityLabel}"${where}: unterminated character reference in replacement text`); } index = referenceIndex + 1; continue; } if (!XMLUtils.isNameStartChar(following)) { throw new Error(`Invalid ${entityType} "${entityLabel}"${where}: unescaped '&' in replacement text`); } let refIndex = index + 2; while (refIndex < content.length && XMLUtils.isNameChar(content.charAt(refIndex))) { refIndex++; } if (refIndex >= content.length || content.charAt(refIndex) !== ';') { throw new Error(`Invalid ${entityType} "${entityLabel}"${where}: unterminated entity reference in replacement text`); } index = refIndex + 1; } } ensureGeneralEntityDelimitersBalanced(content, entityLabel, where) { const preview = this.decodeCharacterReferencesForValidation(content); let index = 0; let inSingleQuote = false; let inDoubleQuote = false; while (index < preview.length) { const char = preview.charAt(index); if (!inDoubleQuote && char === "'") { inSingleQuote = !inSingleQuote; index++; continue; } if (!inSingleQuote && char === '"') { inDoubleQuote = !inDoubleQuote; index++; continue; } if (inSingleQuote || inDoubleQuote) { index++; continue; } if (preview.startsWith('<![CDATA[', index)) { const closing = preview.indexOf(']]>', index + '<![CDATA['.length); if (closing === -1) { throw new Error(`Invalid general entity "${entityLabel}"${where}: CDATA section start delimiter appears without matching end`); } index = closing + ']]>'.length; continue; } if (preview.startsWith('<!--', index)) { const closing = preview.indexOf('-->', index + '<!--'.length); if (closing === -1) { throw new Error(`Invalid general entity "${entityLabel}"${where}: comment opened but not closed in replacement text`); } index = closing + '-->'.length; continue; } if (preview.startsWith('<?', index)) { const closing = preview.indexOf('?>', index + '<?'.length); if (closing === -1) { throw new Error(`Invalid general entity "${entityLabel}"${where}: processing instruction opened but not closed in replacement text`); } index = closing + '?>'.length; continue; } index++; } } decodeCharacterReferencesForValidation(content) { let result = ''; let index = 0; while (index < content.length) { const char = content.charAt(index); if (char !== '&') { result += char; index++; continue; } if (content.startsWith('&#x', index) || content.startsWith('&#X', index)) { const semi = content.indexOf(';', index + 3); if (semi === -1) { result += '&'; index++; continue; } const hexDigits = content.substring(index + 3, semi); const value = Number.parseInt(hexDigits, 16); if (!Number.isNaN(value)) { result += String.fromCodePoint(value); } index = semi + 1; continue; } if (content.startsWith('&#', index)) { const semi = content.indexOf(';', index + 2); if (semi === -1) { result += '&'; index++; continue; } const digits = content.substring(index + 2, semi); const value = Number.parseInt(digits, 10); if (!Number.isNaN(value)) { result += String.fromCodePoint(value); } index = semi + 1; continue; } if (content.startsWith('&lt;', index)) { result += '<'; index += 4; continue; } if (content.startsWith('&gt;', index)) { result += '>'; index += 4; continue; } if (content.startsWith('&amp;', index)) { result += '&'; index += 5; continue; } if (content.startsWith('&apos;', index)) { result += "'"; index += 6; continue; } if (content.startsWith('&quot;', index)) { result += '"'; index += 6; continue; } result += char; index++; } return result; } parseNotationDeclaration(declaration) { this.requireWhitespaceAfterKeyword(declaration, '<!NOTATION', 'NOTATION declaration'); if (this.parsingInternalSubset && this.hasParameterEntityReferenceOutsideLiterals(declaration)) { const location = this.currentFile || this.baseDirectory || 'DTD'; const where = location ? ` in ${location}` : ''; throw new Error(`Invalid NOTATION declaration${where}: parameter entity references are not allowed inside markup declarations in the internal subset`); } let name = ''; let i = '<!NOTATION'.length; let char = declaration.charAt(i); // skip spaces before notation name for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } // get notation name for (; i < declaration.length; i++) { char = declaration.charAt(i); if (XMLUtils.isXmlSpace(char)) { break; } name += char; } // Validate notation name if (!XMLUtils.isValidXMLName(name)) { throw new Error(`Invalid notation name in DTD: "${name}" - XML names must be valid`); } // skip spaces before external id for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } let publicId = ''; let systemId = ''; if (XMLUtils.lookingAt('PUBLIC', declaration, i)) { i += 'PUBLIC'.length; // skip spaces before public id for (; i < declaration.length; i++) { char = declaration.charAt(i); if (!XMLUtils.isXmlSpace(char)) { break; } } let separator = declaration.charAt(i); i++; // skip opening " // get public id for (; i < declaration.length; i++) { char = declaration.charAt(i); if (char === separator) { break; } publicId += char; } i++; // skip closing " if (XMLUtils.hasParameterEntity(publicId)) { publicId = this.resolveEntities(publicId); } // skip spaces befo