UNPKG

typesxml

Version:

Open source XML library written in TypeScript

553 lines 20.8 kB
"use strict"; /******************************************************************************* * Copyright (c) 2023 - 2024 Maxprograms. * * This program and the accompanying materials * are made available under the terms of the Eclipse License 1.0 * which accompanies this distribution, and is available at * https://www.eclipse.org/org/documents/epl-v10.html * * Contributors: * Maxprograms - initial API and implementation *******************************************************************************/ Object.defineProperty(exports, "__esModule", { value: true }); exports.SAXParser = void 0; const fs_1 = require("fs"); const os_1 = require("os"); const FileReader_1 = require("./FileReader"); const XMLAttribute_1 = require("./XMLAttribute"); const XMLUtils_1 = require("./XMLUtils"); class SAXParser { contentHandler; reader; pointer; buffer; elementStack; characterRun; rootParsed; xmlVersion; static MIN_BUFFER_SIZE = 2048; static path = require('path'); constructor() { this.characterRun = ''; this.elementStack = 0; this.pointer = 0; this.rootParsed = false; this.xmlVersion = '1.0'; } setContentHandler(contentHandler) { this.contentHandler = contentHandler; } parseFile(path, encoding) { if (!this.contentHandler) { throw new Error('ContentHandler not set'); } if (!encoding) { encoding = FileReader_1.FileReader.detectEncoding(path); } this.reader = new FileReader_1.FileReader(path, encoding); this.buffer = this.reader.read(); this.contentHandler.initialize(); this.readDocument(); this.reader.closeFile(); } parseString(data) { if (!this.contentHandler) { throw new Error('ContentHandler not set'); } const letters = 'abcdefghijklmnopqrstuvxyz'; let name = ''; for (let i = 0; i < 8; i++) { const randomNumber = Math.floor(Math.random() * 24); let letter = letters.charAt(randomNumber); name += letter; } name = name + '.xml'; let tempFile = SAXParser.path.join((0, os_1.tmpdir)(), name); (0, fs_1.writeFileSync)(tempFile, data, { encoding: 'utf8' }); this.parseFile(tempFile, 'utf8'); (0, fs_1.unlinkSync)(tempFile); } readDocument() { this.contentHandler.startDocument(); while (this.pointer < this.buffer.length) { if (this.lookingAt('<?xml ') || this.lookingAt('<?xml\t') || this.lookingAt('<?xml\r') || this.lookingAt('<?xml\n')) { this.parseXMLDeclaration(); continue; } if (this.lookingAt('<!DOCTYPE')) { this.parseDoctype(); continue; } if (this.lookingAt('<!--')) { this.parseComment(); continue; } if (this.lookingAt('<?')) { this.parseProcessingInstruction(); continue; } if (this.lookingAt('</')) { this.endElement(); continue; } if (this.lookingAt('<![CDATA[')) { this.startCDATA(); continue; } if (this.lookingAt(']]>')) { this.endCDATA(); continue; } if (this.lookingAt('&')) { this.parseEntityReference(); continue; } if (this.lookingAt('<')) { this.startElement(); continue; } let char = this.buffer.charAt(this.pointer); if (!this.rootParsed && !XMLUtils_1.XMLUtils.isXmlSpace(char)) { throw new Error('Malformed XML document: text found in prolog'); } if (this.rootParsed && this.elementStack === 0 && !XMLUtils_1.XMLUtils.isXmlSpace(char)) { throw new Error('Malformed XML document: text found after root element'); } this.characterRun += char; this.pointer++; if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } if (this.rootParsed && this.elementStack === 0) { this.contentHandler.endDocument(); } } if (this.elementStack !== 0) { throw new Error('Malformed XML document: unclosed elements'); } this.cleanCharacterRun(); } parseEntityReference() { this.cleanCharacterRun(); this.pointer++; // skip '&' let name = ''; while (!this.lookingAt(';')) { name += this.buffer.charAt(this.pointer++); if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } if (name === 'lt') { this.contentHandler.characters('<'); } else if (name === 'gt') { this.contentHandler.characters('>'); } else if (name === 'amp') { this.contentHandler.characters('&'); } else if (name === 'apos') { this.contentHandler.characters('\''); } else if (name === 'quot') { this.contentHandler.characters('"'); } else if (name.startsWith('#x')) { let code = parseInt(name.substring(2), 16); let char = String.fromCharCode(code); this.contentHandler.characters(this.xmlVersion === '1.0' ? XMLUtils_1.XMLUtils.validXml10Chars(char) : XMLUtils_1.XMLUtils.validXml11Chars(char)); } else if (name.startsWith('#')) { let code = parseInt(name.substring(1)); let char = String.fromCharCode(code); this.contentHandler.characters(this.xmlVersion === '1.0' ? XMLUtils_1.XMLUtils.validXml10Chars(char) : XMLUtils_1.XMLUtils.validXml11Chars(char)); } else { this.contentHandler.skippedEntity(name); } this.pointer++; // skip ';' this.buffer = this.buffer.substring(this.pointer); this.pointer = 0; } startElement() { this.cleanCharacterRun(); this.pointer++; // skip '<' let name = ''; while (!XMLUtils_1.XMLUtils.isXmlSpace(this.buffer.charAt(this.pointer)) && !this.lookingAt('>') && !this.lookingAt('/>')) { name += this.buffer.charAt(this.pointer++); if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } let rest = ''; while (!this.lookingAt('>') && !this.lookingAt('/>')) { rest += this.buffer.charAt(this.pointer++); if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } rest = rest.trim(); let attributesMap = this.parseAttributes(rest); let attributes = []; attributesMap.forEach((value, key) => { // TODO https://www.w3.org/TR/REC-xml/#AVNormalize let attribute = new XMLAttribute_1.XMLAttribute(key, value); attributes.push(attribute); }); this.contentHandler.startElement(name, attributes); this.elementStack++; if (!this.rootParsed) { this.rootParsed = true; } if (this.lookingAt('/>')) { this.cleanCharacterRun(); this.contentHandler.endElement(name); this.elementStack--; this.pointer += 2; // skip '/>' } else { this.pointer++; // skip '>' } this.buffer = this.buffer.substring(this.pointer); this.pointer = 0; } endElement() { this.cleanCharacterRun(); this.pointer += 2; // skip '</' let name = ''; while (!this.lookingAt('>')) { name += this.buffer.charAt(this.pointer++); } this.contentHandler.endElement(name); this.elementStack--; this.pointer++; // skip '>' this.buffer = this.buffer.substring(this.pointer); this.pointer = 0; } cleanCharacterRun() { if (this.characterRun !== '') { if (this.rootParsed) { if (this.elementStack === 0) { // document ended this.contentHandler.ignorableWhitespace(this.characterRun); } else { // in an element this.contentHandler.characters(this.characterRun); } } else { // in prolog this.contentHandler.ignorableWhitespace(this.characterRun); } this.characterRun = ''; } } parseComment() { this.cleanCharacterRun(); let comment = ''; this.pointer += 4; // skip '<!--' while (!this.lookingAt('-->')) { comment += this.buffer.charAt(this.pointer++); } this.buffer = this.buffer.substring(this.pointer + 3); // skip '-->' this.pointer = 0; this.contentHandler.comment(comment); } parseProcessingInstruction() { this.cleanCharacterRun(); let instructionText = ''; let target = ''; let data = ''; this.pointer += 2; // skip '<?' while (!this.lookingAt('?>')) { instructionText += this.buffer.charAt(this.pointer++); } instructionText = instructionText.trim(); let i = 0; // read target for (; i < instructionText.length; i++) { let char = instructionText[i]; if (XMLUtils_1.XMLUtils.isXmlSpace(char)) { break; } target += char; } // skip spaces for (; i < instructionText.length; i++) { let char = instructionText[i]; if (!XMLUtils_1.XMLUtils.isXmlSpace(char)) { break; } } // set data data = instructionText.substring(i); this.buffer = this.buffer.substring(this.pointer + 2); // skip '?>' this.pointer = 0; this.contentHandler.processingInstruction(target, data); } parseDoctype() { this.cleanCharacterRun(); this.pointer += 9; // skip '<!DOCTYPE' // skip spaces before root name for (; this.pointer < this.buffer.length; this.pointer++) { let char = this.buffer[this.pointer]; if (!XMLUtils_1.XMLUtils.isXmlSpace(char)) { break; } if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } // read name let name = ''; for (; this.pointer < this.buffer.length; this.pointer++) { let char = this.buffer.charAt(this.pointer); if (XMLUtils_1.XMLUtils.isXmlSpace(char)) { break; } name += char; if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } // skip spaces after root name for (; this.pointer < this.buffer.length; this.pointer++) { let char = this.buffer[this.pointer]; if (!XMLUtils_1.XMLUtils.isXmlSpace(char)) { break; } if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } // read external identifiers let systemId = ''; if (this.lookingAt('SYSTEM')) { systemId = this.parseSystemDeclaration(); } let publicId = ''; if (this.lookingAt('PUBLIC')) { let pair = this.parsePublicDeclaration(); publicId = pair[0]; systemId = pair[1]; } this.contentHandler.startDTD(name, publicId, systemId); // skip spaces after SYSTEM or PUBLIC for (; this.pointer < this.buffer.length; this.pointer++) { let char = this.buffer[this.pointer]; if (!XMLUtils_1.XMLUtils.isXmlSpace(char)) { break; } if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } // check internal subset let internalSubset = ''; if (this.lookingAt('[')) { this.pointer++; // skip '[' for (; this.pointer < this.buffer.length; this.pointer++) { let char = this.buffer[this.pointer]; if (']' === char) { break; } internalSubset += char; if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } this.pointer++; // skip ']' } // skip spaces after internal subset for (; this.pointer < this.buffer.length; this.pointer++) { let char = this.buffer[this.pointer]; if (!XMLUtils_1.XMLUtils.isXmlSpace(char)) { break; } if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } this.pointer++; // skip '>' this.buffer = this.buffer.substring(this.pointer); this.pointer = 0; if (internalSubset !== '') { this.contentHandler.internalSubset(internalSubset); } this.contentHandler.endDTD(); } parsePublicDeclaration() { this.pointer += 6; // skip 'PUBLIC' // skip spaces after PUBLIC for (; this.pointer < this.buffer.length; this.pointer++) { let char = this.buffer[this.pointer]; if (!XMLUtils_1.XMLUtils.isXmlSpace(char)) { break; } if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } let separator = ''; let publicId = ''; for (; this.pointer < this.buffer.length; this.pointer++) { let char = this.buffer[this.pointer]; if (separator === '' && ('\'' === char || '"' === char)) { separator = char; continue; } if (char === separator) { this.pointer++; // skip separator break; } publicId += char; if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } // skip spaces after publicId for (; this.pointer < this.buffer.length; this.pointer++) { let char = this.buffer[this.pointer]; if (!XMLUtils_1.XMLUtils.isXmlSpace(char)) { break; } if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } separator = ''; let systemIdId = ''; for (; this.pointer < this.buffer.length; this.pointer++) { let char = this.buffer[this.pointer]; if (separator === '' && ('\'' === char || '"' === char)) { separator = char; continue; } if (char === separator) { this.pointer++; // skip separator break; } systemIdId += char; if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } return [publicId, systemIdId]; } parseSystemDeclaration() { this.pointer += 6; // skip 'SYSTEM' // skip spaces after SYSTEM for (; this.pointer < this.buffer.length; this.pointer++) { let char = this.buffer[this.pointer]; if (!XMLUtils_1.XMLUtils.isXmlSpace(char)) { break; } if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } let separator = ''; let systemId = ''; for (; this.pointer < this.buffer.length; this.pointer++) { let char = this.buffer[this.pointer]; if (separator === '' && ('\'' === char || '"' === char)) { separator = char; continue; } if (char === separator) { this.pointer++; // skip separator break; } systemId += char; if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } } return systemId; } parseXMLDeclaration() { let declarationText = ''; this.pointer += 6; // skip '<?xml ' while (!this.lookingAt('?>')) { declarationText += this.buffer.charAt(this.pointer++); } declarationText = declarationText.trim(); let attributes = this.parseAttributes(declarationText); this.buffer = this.buffer.substring(this.pointer + 2); // skip '?>' this.pointer = 0; this.xmlVersion = attributes.get('version'); this.contentHandler.xmlDeclaration(attributes.get('version'), attributes.get('encoding'), attributes.get('standalone')); } lookingAt(text) { let length = text.length; if (this.buffer.length - this.pointer < SAXParser.MIN_BUFFER_SIZE && this.reader.dataAvailable()) { this.buffer += this.reader.read(); } if (this.pointer + length > this.buffer.length) { return false; } for (let i = 0; i < length; i++) { if (this.buffer[this.pointer + i] !== text[i]) { return false; } } return true; } parseAttributes(text) { let map = new Map(); let pairs = []; let separator = ''; while (text.indexOf('=') != -1) { let i = 0; for (; i < text.length; i++) { let char = text[i]; if (XMLUtils_1.XMLUtils.isXmlSpace(char) || '=' === char) { break; } } for (; i < text.length; i++) { let char = text[i]; if (separator === '' && ('\'' === char || '"' === char)) { separator = char; continue; } if (char === separator) { break; } } // end of value let pair = text.substring(0, i + 1).trim(); pairs.push(pair); text = text.substring(pair.length).trim(); separator = ''; } pairs.forEach((pair) => { let index = pair.indexOf('='); if (index === -1) { throw new Error('Malformed attributes list'); } let name = pair.substring(0, index).trim(); let value = pair.substring(index + 2, pair.length - 1); map.set(name, value); }); return map; } startCDATA() { this.cleanCharacterRun(); this.pointer += 9; // skip '<![CDATA[' this.buffer = this.buffer.substring(this.pointer); this.pointer = 0; this.contentHandler.startCDATA(); } endCDATA() { this.cleanCharacterRun(); this.pointer += 3; // skip ']]>' this.buffer = this.buffer.substring(this.pointer); this.pointer = 0; this.contentHandler.endCDATA(); } } exports.SAXParser = SAXParser; //# sourceMappingURL=SAXParser.js.map