UNPKG

fast-xml-parser

Version:

Validate XML, Parse XML, Build XML without C/C++ based libraries

370 lines (313 loc) 11.5 kB
import {isName} from '../util.js'; //TODO: handle comments export default function readDocType(xmlData, i){ const entities = {}; if( xmlData[i + 3] === 'O' && xmlData[i + 4] === 'C' && xmlData[i + 5] === 'T' && xmlData[i + 6] === 'Y' && xmlData[i + 7] === 'P' && xmlData[i + 8] === 'E') { i = i+9; let angleBracketsCount = 1; let hasBody = false, comment = false; let exp = ""; for(;i<xmlData.length;i++){ if (xmlData[i] === '<' && !comment) { //Determine the tag type if( hasBody && hasSeq(xmlData, "!ENTITY",i)){ i += 7; let entityName, val; [entityName, val,i] = readEntityExp(xmlData,i+1); if(val.indexOf("&") === -1) //Parameter entities are not supported entities[ entityName ] = { regx : RegExp( `&${entityName};`,"g"), val: val }; } else if( hasBody && hasSeq(xmlData, "!ELEMENT",i)) { i += 8;//Not supported const {index} = readElementExp(xmlData,i+1); i = index; }else if( hasBody && hasSeq(xmlData, "!ATTLIST",i)){ i += 8;//Not supported // const {index} = readAttlistExp(xmlData,i+1); // i = index; }else if( hasBody && hasSeq(xmlData, "!NOTATION",i)) { i += 9;//Not supported const {index} = readNotationExp(xmlData,i+1); i = index; }else if( hasSeq(xmlData, "!--",i) ) comment = true; else throw new Error(`Invalid DOCTYPE`); angleBracketsCount++; exp = ""; } else if (xmlData[i] === '>') { //Read tag content if(comment){ if( xmlData[i - 1] === "-" && xmlData[i - 2] === "-"){ comment = false; angleBracketsCount--; } }else{ angleBracketsCount--; } if (angleBracketsCount === 0) { break; } }else if( xmlData[i] === '['){ hasBody = true; }else{ exp += xmlData[i]; } } if(angleBracketsCount !== 0){ throw new Error(`Unclosed DOCTYPE`); } }else{ throw new Error(`Invalid Tag instead of DOCTYPE`); } return {entities, i}; } const skipWhitespace = (data, index) => { while (index < data.length && /\s/.test(data[index])) { index++; } return index; }; function readEntityExp(xmlData, i) { //External entities are not supported // <!ENTITY ext SYSTEM "http://normal-website.com" > //Parameter entities are not supported // <!ENTITY entityname "&anotherElement;"> //Internal entities are supported // <!ENTITY entityname "replacement text"> // Skip leading whitespace after <!ENTITY i = skipWhitespace(xmlData, i); // Read entity name let entityName = ""; while (i < xmlData.length && !/\s/.test(xmlData[i]) && xmlData[i] !== '"' && xmlData[i] !== "'") { entityName += xmlData[i]; i++; } validateEntityName(entityName); // Skip whitespace after entity name i = skipWhitespace(xmlData, i); // Check for unsupported constructs (external entities or parameter entities) if (xmlData.substring(i, i + 6).toUpperCase() === "SYSTEM") { throw new Error("External entities are not supported"); }else if (xmlData[i] === "%") { throw new Error("Parameter entities are not supported"); } // Read entity value (internal entity) let entityValue = ""; [i, entityValue] = readIdentifierVal(xmlData, i, "entity"); i--; return [entityName, entityValue, i ]; } function readNotationExp(xmlData, i) { // Skip leading whitespace after <!NOTATION i = skipWhitespace(xmlData, i); // Read notation name let notationName = ""; while (i < xmlData.length && !/\s/.test(xmlData[i])) { notationName += xmlData[i]; i++; } validateEntityName(notationName); // Skip whitespace after notation name i = skipWhitespace(xmlData, i); // Check identifier type (SYSTEM or PUBLIC) const identifierType = xmlData.substring(i, i + 6).toUpperCase(); if (identifierType !== "SYSTEM" && identifierType !== "PUBLIC") { throw new Error(`Expected SYSTEM or PUBLIC, found "${identifierType}"`); } i += identifierType.length; // Skip whitespace after identifier type i = skipWhitespace(xmlData, i); // Read public identifier (if PUBLIC) let publicIdentifier = null; let systemIdentifier = null; if (identifierType === "PUBLIC") { [i, publicIdentifier ] = readIdentifierVal(xmlData, i, "publicIdentifier"); // Skip whitespace after public identifier i = skipWhitespace(xmlData, i); // Optionally read system identifier if (xmlData[i] === '"' || xmlData[i] === "'") { [i, systemIdentifier ] = readIdentifierVal(xmlData, i,"systemIdentifier"); } } else if (identifierType === "SYSTEM") { // Read system identifier (mandatory for SYSTEM) [i, systemIdentifier ] = readIdentifierVal(xmlData, i, "systemIdentifier"); if (!systemIdentifier) { throw new Error("Missing mandatory system identifier for SYSTEM notation"); } } return {notationName, publicIdentifier, systemIdentifier, index: --i}; } function readIdentifierVal(xmlData, i, type) { let identifierVal = ""; const startChar = xmlData[i]; if (startChar !== '"' && startChar !== "'") { throw new Error(`Expected quoted string, found "${startChar}"`); } i++; while (i < xmlData.length && xmlData[i] !== startChar) { identifierVal += xmlData[i]; i++; } if (xmlData[i] !== startChar) { throw new Error(`Unterminated ${type} value`); } i++; return [i, identifierVal]; } function readElementExp(xmlData, i) { // <!ELEMENT br EMPTY> // <!ELEMENT div ANY> // <!ELEMENT title (#PCDATA)> // <!ELEMENT book (title, author+)> // <!ELEMENT name (content-model)> // Skip leading whitespace after <!ELEMENT i = skipWhitespace(xmlData, i); // Read element name let elementName = ""; while (i < xmlData.length && !/\s/.test(xmlData[i])) { elementName += xmlData[i]; i++; } // Validate element name if (!validateEntityName(elementName)) { throw new Error(`Invalid element name: "${elementName}"`); } // Skip whitespace after element name i = skipWhitespace(xmlData, i); let contentModel = ""; // Expect '(' to start content model if(xmlData[i] === "E" && hasSeq(xmlData, "MPTY",i)) i+=4; else if(xmlData[i] === "A" && hasSeq(xmlData, "NY",i)) i+=2; else if (xmlData[i] === "(") { i++; // Move past '(' // Read content model while (i < xmlData.length && xmlData[i] !== ")") { contentModel += xmlData[i]; i++; } if (xmlData[i] !== ")") { throw new Error("Unterminated content model"); } }else{ throw new Error(`Invalid Element Expression, found "${xmlData[i]}"`); } return { elementName, contentModel: contentModel.trim(), index: i }; } function readAttlistExp(xmlData, i) { // Skip leading whitespace after <!ATTLIST i = skipWhitespace(xmlData, i); // Read element name let elementName = ""; while (i < xmlData.length && !/\s/.test(xmlData[i])) { elementName += xmlData[i]; i++; } // Validate element name validateEntityName(elementName) // Skip whitespace after element name i = skipWhitespace(xmlData, i); // Read attribute name let attributeName = ""; while (i < xmlData.length && !/\s/.test(xmlData[i])) { attributeName += xmlData[i]; i++; } // Validate attribute name if (!validateEntityName(attributeName)) { throw new Error(`Invalid attribute name: "${attributeName}"`); } // Skip whitespace after attribute name i = skipWhitespace(xmlData, i); // Read attribute type let attributeType = ""; if (xmlData.substring(i, i + 8).toUpperCase() === "NOTATION") { attributeType = "NOTATION"; i += 8; // Move past "NOTATION" // Skip whitespace after "NOTATION" i = skipWhitespace(xmlData, i); // Expect '(' to start the list of notations if (xmlData[i] !== "(") { throw new Error(`Expected '(', found "${xmlData[i]}"`); } i++; // Move past '(' // Read the list of allowed notations let allowedNotations = []; while (i < xmlData.length && xmlData[i] !== ")") { let notation = ""; while (i < xmlData.length && xmlData[i] !== "|" && xmlData[i] !== ")") { notation += xmlData[i]; i++; } // Validate notation name notation = notation.trim(); if (!validateEntityName(notation)) { throw new Error(`Invalid notation name: "${notation}"`); } allowedNotations.push(notation); // Skip '|' separator or exit loop if (xmlData[i] === "|") { i++; // Move past '|' i = skipWhitespace(xmlData, i); // Skip optional whitespace after '|' } } if (xmlData[i] !== ")") { throw new Error("Unterminated list of notations"); } i++; // Move past ')' // Store the allowed notations as part of the attribute type attributeType += " (" + allowedNotations.join("|") + ")"; } else { // Handle simple types (e.g., CDATA, ID, IDREF, etc.) while (i < xmlData.length && !/\s/.test(xmlData[i])) { attributeType += xmlData[i]; i++; } // Validate simple attribute type const validTypes = ["CDATA", "ID", "IDREF", "IDREFS", "ENTITY", "ENTITIES", "NMTOKEN", "NMTOKENS"]; if (!validTypes.includes(attributeType.toUpperCase())) { throw new Error(`Invalid attribute type: "${attributeType}"`); } } // Skip whitespace after attribute type i = skipWhitespace(xmlData, i); // Read default value let defaultValue = ""; if (xmlData.substring(i, i + 8).toUpperCase() === "#REQUIRED") { defaultValue = "#REQUIRED"; i += 8; } else if (xmlData.substring(i, i + 7).toUpperCase() === "#IMPLIED") { defaultValue = "#IMPLIED"; i += 7; } else { [i, defaultValue] = readIdentifierVal(xmlData, i, "ATTLIST"); } return { elementName, attributeName, attributeType, defaultValue, index: i } } function hasSeq(data, seq,i){ for(let j=0;j<seq.length;j++){ if(seq[j]!==data[i+j+1]) return false; } return true; } function validateEntityName(name){ if (isName(name)) return name; else throw new Error(`Invalid entity name ${name}`); }