UNPKG

xml-disassembler

Version:

Disassemble XML files into smaller, more manageable files and reassemble the XML when needed.

660 lines (631 loc) 29 kB
'use strict'; var log4js = require('log4js'); var promises = require('node:fs/promises'); var posix = require('node:path/posix'); var yaml = require('yaml'); var json5 = require('json5'); var smolToml = require('smol-toml'); var ini = require('ini'); var node_fs = require('node:fs'); var node_path = require('node:path'); var ignore = require('ignore'); var node_crypto = require('node:crypto'); var fastXmlParser = require('fast-xml-parser'); /****************************************************************************** Copyright (c) Microsoft Corporation. Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ***************************************************************************** */ /* global Reflect, Promise, SuppressedError, Symbol, Iterator */ function __awaiter(thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); } typeof SuppressedError === "function" ? SuppressedError : function (error, suppressed, message) { var e = new Error(message); return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e; }; const XML_DEFAULT_DECLARATION = '<?xml version="1.0" encoding="UTF-8"?>'; const INDENT = " "; const XML_PARSER_OPTION = { commentPropName: "!---", ignoreAttributes: false, ignoreNameSpace: false, parseTagValue: false, parseNodeValue: false, parseAttributeValue: false, trimValues: true, processEntities: false, cdataPropName: "![CDATA[", }; const JSON_PARSER_OPTION = Object.assign(Object.assign({}, XML_PARSER_OPTION), { format: true, indentBy: INDENT, suppressBooleanAttributes: false, suppressEmptyNode: false }); function buildReassembledFile(combinedXmlContents, reassembledPath, xmlElement, xmlRootElementHeader, xmlDeclarationStr) { return __awaiter(this, void 0, void 0, function* () { let finalXmlContent = combinedXmlContents.join("\n"); const escapedXmlDeclaration = xmlDeclarationStr.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); const xmlDeclarationLineRegex = new RegExp(`^\\s*${escapedXmlDeclaration}\\s*$`, "gm"); finalXmlContent = finalXmlContent.replace(xmlDeclarationLineRegex, ""); finalXmlContent = finalXmlContent.replace(new RegExp(`<${xmlElement}\\s*[^>]*>`, "g"), ""); finalXmlContent = finalXmlContent.replace(new RegExp(`</${xmlElement}>`, "g"), ""); finalXmlContent = finalXmlContent.replace(/<!\[CDATA\[\s*([\s\S]*?)\s*]]>/g, function (_, cdataContent) { const trimmedContent = cdataContent.trim(); const lines = trimmedContent.split("\n"); const indentedLines = lines.map(function (line) { return line.replace(/^\s*/, ""); }); return ("<![CDATA[\n" + INDENT + indentedLines.join("\n" + INDENT) + "\n]]>"); }); finalXmlContent = finalXmlContent.replace(/(\n\s*){2,}/g, `\n${INDENT}`); const closeTag = `</${xmlElement}>`; yield promises.writeFile(reassembledPath, `${xmlDeclarationStr}\n${xmlRootElementHeader}${finalXmlContent}${closeTag}`); logger.debug(`Created reassembled file: ${reassembledPath}`); }); } function buildXMLString(element, indentLevel = 0) { const xmlBuilder = new fastXmlParser.XMLBuilder(JSON_PARSER_OPTION); const xmlString = xmlBuilder.build(element); const formattedXml = xmlString .split("\n") .map((line) => `${" ".repeat(indentLevel * INDENT.length)}${line}`) .join("\n") .trimEnd(); return formattedXml; } function parseXML(filePath) { return __awaiter(this, void 0, void 0, function* () { const xmlParser = new fastXmlParser.XMLParser(XML_PARSER_OPTION); const xmlContent = yield promises.readFile(filePath, "utf-8"); let xmlParsed; try { xmlParsed = xmlParser.parse(xmlContent, true); return xmlParsed; } catch (err) { logger.error(`${filePath} was unabled to be parsed and will not be processed. Confirm formatting and try again.`); return undefined; } }); } function buildRootElementHeader(rootElement, rootElementName) { let rootElementHeader = `<${rootElementName}`; for (const [attrKey, attrValue] of Object.entries(rootElement)) { if (attrKey.startsWith("@")) { const cleanAttrKey = attrKey.slice(2); rootElementHeader += ` ${cleanAttrKey}="${String(attrValue)}"`; } } rootElementHeader += ">"; return rootElementHeader; } function buildXMLDeclaration(parsedXml) { let xmlDeclarationStr = XML_DEFAULT_DECLARATION; if (parsedXml["?xml"]) { const xmlDeclaration = parsedXml["?xml"]; const attributes = Object.entries(xmlDeclaration) .map(([key, value]) => `${key.replace("@_", "")}="${value}"`) .join(" "); xmlDeclarationStr = `<?xml ${attributes}?>`; } return xmlDeclarationStr; } function parseRootElement(xmlParsed) { return __awaiter(this, void 0, void 0, function* () { const xmlDeclarationStr = buildXMLDeclaration(xmlParsed); const rootElementName = Object.keys(xmlParsed)[1]; const rootElement = xmlParsed[rootElementName]; const rootElementHeader = buildRootElementHeader(rootElement, rootElementName); return [rootElementName, rootElementHeader, xmlDeclarationStr]; }); } class ReassembleXMLFileHandler { processFilesInDirectory(dirPath) { return __awaiter(this, void 0, void 0, function* () { const combinedXmlContents = []; let rootResult = undefined; const files = yield promises.readdir(dirPath); files.sort((fileA, fileB) => { const fullNameA = fileA.split(".")[0].toLowerCase(); const fullNameB = fileB.split(".")[0].toLowerCase(); return fullNameA.localeCompare(fullNameB); }); for (const file of files) { const filePath = posix.join(dirPath, file); const fileStat = yield promises.stat(filePath); if (fileStat.isFile()) { if (/\.(xml|json|json5|ya?ml|toml|ini)$/.test(file)) { const parsedObject = yield this.parseToXmlObject(filePath); if (parsedObject === undefined) continue; const rootResultFromFile = yield parseRootElement(parsedObject); rootResult = rootResultFromFile; const combinedXmlString = buildXMLString(parsedObject); combinedXmlContents.push(combinedXmlString); } } else if (fileStat.isDirectory()) { const [subCombinedXmlContents, subRootResult] = yield this.processFilesInDirectory(filePath); combinedXmlContents.push(...subCombinedXmlContents); rootResult = subRootResult; } } return [combinedXmlContents, rootResult]; }); } reassemble(xmlAttributes) { return __awaiter(this, void 0, void 0, function* () { const { filePath, fileExtension, postPurge = false } = xmlAttributes; let combinedXmlContents = []; const fileStat = yield promises.stat(filePath); if (!fileStat.isDirectory()) { logger.error(`The provided path to reassemble is not a directory: ${filePath}`); return; } logger.debug(`Parsing directory to reassemble: ${filePath}`); const [subCombinedXmlContents, rootResult] = yield this.processFilesInDirectory(filePath); combinedXmlContents = subCombinedXmlContents; const parentDirectory = posix.dirname(filePath); const subdirectoryBasename = posix.basename(filePath); const fileName = fileExtension ? `${subdirectoryBasename}.${fileExtension}` : `${subdirectoryBasename}.xml`; const outputPath = posix.join(parentDirectory, fileName); if (rootResult !== undefined) { const [rootElementName, rootElementHeader, xmlDeclarationStr] = rootResult; yield buildReassembledFile(combinedXmlContents, outputPath, rootElementName, rootElementHeader, xmlDeclarationStr); if (postPurge) yield promises.rm(filePath, { recursive: true }); } else { logger.error(`No files under ${filePath} were parsed successfully. A reassembled XML file was not created.`); } }); } parseToXmlObject(filePath) { return __awaiter(this, void 0, void 0, function* () { if (filePath.endsWith(".xml")) { return yield parseXML(filePath); } const fileContent = yield promises.readFile(filePath, "utf-8"); let parsed; if (filePath.endsWith(".yaml") || filePath.endsWith(".yml")) { parsed = yaml.parse(fileContent); } else if (filePath.endsWith(".json5")) { parsed = json5.parse(fileContent); } else if (filePath.endsWith(".json")) { parsed = JSON.parse(fileContent); } else if (filePath.endsWith(".toml")) { parsed = smolToml.parse(fileContent); } else if (filePath.endsWith(".ini")) { parsed = ini.parse(fileContent); } return parsed; }); } } function parseUniqueIdElement(element, uniqueIdElements) { if (uniqueIdElements === undefined) { return createShortHash(element); } const uniqueIdElementsArray = uniqueIdElements.split(","); for (const fieldName of uniqueIdElementsArray) { if (element[fieldName] !== undefined) { if (typeof element[fieldName] === "string") { return element[fieldName]; } } } for (const key in element) { if (typeof element[key] === "object" && element[key] !== null) { const childFieldName = parseUniqueIdElement(element[key], uniqueIdElements); if (childFieldName !== undefined) { return childFieldName; } } } return createShortHash(element); } function createShortHash(element) { const hash = node_crypto.createHash("sha256"); hash.update(JSON.stringify(element)); const fullHash = hash.digest("hex"); return fullHash.slice(0, 8); } function transformToYaml(xmlPath) { return __awaiter(this, void 0, void 0, function* () { const parsedXml = yield parseXML(xmlPath); const yamlString = yaml.stringify(parsedXml); const yamlPath = xmlPath.replace(/\.xml$/, ".yaml"); yield promises.writeFile(yamlPath, yamlString); logger.debug(`${xmlPath} has been transformed into ${yamlPath}`); }); } function transformToJson5(xmlPath) { return __awaiter(this, void 0, void 0, function* () { const parsedXml = yield parseXML(xmlPath); const jsonString = json5.stringify(parsedXml, null, 2); const jsonPath = xmlPath.replace(/\.xml$/, ".json5"); yield promises.writeFile(jsonPath, jsonString); logger.debug(`${xmlPath} has been transformed into ${jsonPath}`); }); } function transformToJson(xmlPath) { return __awaiter(this, void 0, void 0, function* () { const parsedXml = yield parseXML(xmlPath); const jsonString = JSON.stringify(parsedXml, null, 2); const jsonPath = xmlPath.replace(/\.xml$/, ".json"); yield promises.writeFile(jsonPath, jsonString); logger.debug(`${xmlPath} has been transformed into ${jsonPath}`); }); } function transformToToml(xmlPath) { return __awaiter(this, void 0, void 0, function* () { const parsedXml = yield parseXML(xmlPath); const jsonString = smolToml.stringify(parsedXml); const jsonPath = xmlPath.replace(/\.xml$/, ".toml"); yield promises.writeFile(jsonPath, jsonString); logger.debug(`${xmlPath} has been transformed into ${jsonPath}`); }); } function transformToIni(xmlPath) { return __awaiter(this, void 0, void 0, function* () { const parsedXml = yield parseXML(xmlPath); const jsonString = ini.stringify(parsedXml); const jsonPath = xmlPath.replace(/\.xml$/, ".ini"); yield promises.writeFile(jsonPath, jsonString); logger.debug(`${xmlPath} has been transformed into ${jsonPath}`); }); } function getTransformer(format) { switch (format) { case "yaml": return transformToYaml; case "json5": return transformToJson5; case "json": return transformToJson; case "toml": return transformToToml; case "ini": return transformToIni; default: return undefined; } } function buildNestedFile(element, disassembledPath, uniqueIdElements, rootElementName, rootElementHeader, parentKey, indent, xmlDeclarationStr, format) { return __awaiter(this, void 0, void 0, function* () { let elementContent = ""; const fieldName = parseUniqueIdElement(element, uniqueIdElements); const outputDirectory = posix.join(disassembledPath, parentKey); const outputFileName = `${fieldName}.${parentKey}-meta.xml`; const outputPath = posix.join(outputDirectory, outputFileName); yield promises.mkdir(outputDirectory, { recursive: true }); const parentKeyHeader = buildRootElementHeader(element, parentKey); elementContent = buildXMLString(element, 2); let nestedFileContents = `${xmlDeclarationStr}\n`; nestedFileContents += `${rootElementHeader}\n`; nestedFileContents += `${indent}${parentKeyHeader}\n`; nestedFileContents += `${elementContent}\n`; nestedFileContents += `${indent}</${parentKey}>\n`; nestedFileContents += `</${rootElementName}>`; yield promises.writeFile(outputPath, nestedFileContents); logger.debug(`Created disassembled file: ${outputPath}`); const transformer = getTransformer(format); if (transformer) { yield transformer(outputPath); yield promises.rm(outputPath); } }); } function parseElement$1(params) { return __awaiter(this, void 0, void 0, function* () { const { element, disassembledPath, uniqueIdElements, rootElementName, rootElementHeader, key, indent, leafContent, leafCount, hasNestedElements, xmlDeclarationStr, format, } = params; if (typeof element === "object") { yield buildNestedFile(element, disassembledPath, uniqueIdElements, rootElementName, rootElementHeader, key, indent, xmlDeclarationStr, format); return [leafContent, leafCount, true]; } else { const updatedLeafContent = `${leafContent}${indent}<${key}>${String(element)}</${key}>\n`; return [updatedLeafContent, leafCount + 1, hasNestedElements]; } }); } function buildLeafFile(leafContent, disassembledPath, baseName, rootElementName, rootElementHeader, xmlDeclarationStr, format) { return __awaiter(this, void 0, void 0, function* () { let leafFile = `${xmlDeclarationStr}\n`; leafFile += `${rootElementHeader}\n`; leafFile += leafContent; leafFile += `</${rootElementName}>`; const leafOutputPath = posix.join(disassembledPath, `${baseName}.xml`); yield promises.writeFile(leafOutputPath, leafFile); logger.debug(`Created disassembled file: ${leafOutputPath}`); const transformer = getTransformer(format); if (transformer) { yield transformer(leafOutputPath); yield promises.rm(leafOutputPath); } }); } function buildDisassembledFiles$1(filePath, disassembledPath, uniqueIdElements, baseName, indent, postPurge, format) { return __awaiter(this, void 0, void 0, function* () { const parsedXml = yield parseXML(filePath); if (parsedXml === undefined) return; const rootElementName = Object.keys(parsedXml)[1]; const xmlDeclarationStr = buildXMLDeclaration(parsedXml); const rootElement = parsedXml[rootElementName]; const rootElementHeader = buildRootElementHeader(rootElement, rootElementName); let leafContent = ""; let leafCount = 0; let hasNestedElements = false; for (const key of Object.keys(rootElement).filter((key) => !key.startsWith("@"))) { if (Array.isArray(rootElement[key])) { for (const element of rootElement[key]) { const [updatedLeafContent, updatedLeafCount, updatedHasNestedElements] = yield parseElement$1({ element, disassembledPath, uniqueIdElements, rootElementName, rootElementHeader, key, indent, leafContent, leafCount, hasNestedElements, xmlDeclarationStr, format, }); leafContent = updatedLeafContent; leafCount = updatedLeafCount; hasNestedElements = updatedHasNestedElements; } } else { const [updatedLeafContent, updatedLeafCount, updatedHasNestedElements] = yield parseElement$1({ element: rootElement[key], disassembledPath, uniqueIdElements, rootElementName, rootElementHeader, key, indent, leafContent, leafCount, hasNestedElements, xmlDeclarationStr, format, }); leafContent = updatedLeafContent; leafCount = updatedLeafCount; hasNestedElements = updatedHasNestedElements; } } if (!hasNestedElements) { logger.error(`The XML file ${filePath} only has leaf elements. This file will not be disassembled.`); return; } if (leafCount > 0) { yield buildLeafFile(leafContent, disassembledPath, baseName, rootElementName, rootElementHeader, xmlDeclarationStr, format); } if (postPurge) { promises.unlink(filePath); } }); } function parseElement(params) { return __awaiter(this, void 0, void 0, function* () { const { element, key, indent, leafContent, leafCount, hasNestedElements } = params; const nestedGroups = {}; if (typeof element === "object") { if (!nestedGroups[key]) nestedGroups[key] = []; nestedGroups[key].push(element); return { leafContent, leafCount, hasNestedElements: true, nestedGroups, }; } else { const updatedLeafContent = `${leafContent}${indent}<${key}>${String(element)}</${key}>\n`; return { leafContent: updatedLeafContent, leafCount: leafCount + 1, hasNestedElements, nestedGroups, }; } }); } function buildGroupedNestedFile(tag, elements, disassembledPath, rootElementName, rootElementHeader, xmlDeclarationStr, indent, format) { return __awaiter(this, void 0, void 0, function* () { const outputPath = posix.join(disassembledPath, `${tag}.xml`); yield promises.mkdir(disassembledPath, { recursive: true }); let content = `${xmlDeclarationStr}\n${rootElementHeader}\n`; for (const el of elements) { const attributes = Object.entries(el) .filter(([key, value]) => isPrefixedAttribute(key, value)) .map(([key, value]) => ` ${key.replace(/^@_/, "")}="${value}"`) .join(""); const elementWithoutAttrs = Object.fromEntries(Object.entries(el).filter(([key]) => !key.startsWith("@_"))); content += `${indent}<${tag}${attributes}>\n`; content += buildXMLString(elementWithoutAttrs, 2); content += `\n${indent}</${tag}>\n`; } content += `</${rootElementName}>`; yield promises.writeFile(outputPath, content); logger.debug(`Created grouped nested file: ${outputPath}`); const transformer = getTransformer(format); if (transformer) { yield transformer(outputPath); yield promises.rm(outputPath); } }); } function isPrefixedAttribute(key, value) { const isAttr = key.startsWith("@_"); const isLiteral = typeof value === "string" || typeof value === "number"; return isAttr && isLiteral; } function buildDisassembledFiles(filePath, disassembledPath, baseName, indent, postPurge, format) { return __awaiter(this, void 0, void 0, function* () { const parsedXml = yield parseXML(filePath); if (parsedXml === undefined) return; const rootElementName = Object.keys(parsedXml)[1]; const xmlDeclarationStr = buildXMLDeclaration(parsedXml); const rootElement = parsedXml[rootElementName]; const rootElementHeader = buildRootElementHeader(rootElement, rootElementName); let leafContent = ""; let leafCount = 0; let hasNestedElements = false; const nestedGroups = {}; for (const key of Object.keys(rootElement).filter((key) => !key.startsWith("@"))) { const elements = Array.isArray(rootElement[key]) ? rootElement[key] : [rootElement[key]]; for (const element of elements) { const result = yield parseElement({ element, key, indent, leafContent, leafCount, hasNestedElements}); leafContent = result.leafContent; leafCount = result.leafCount; hasNestedElements = result.hasNestedElements; for (const tag in result.nestedGroups) { if (!nestedGroups[tag]) nestedGroups[tag] = []; nestedGroups[tag].push(...result.nestedGroups[tag]); } } } if (!hasNestedElements) { logger.error(`The XML file ${filePath} only has leaf elements. This file will not be disassembled.`); return; } for (const tag in nestedGroups) { yield buildGroupedNestedFile(tag, nestedGroups[tag], disassembledPath, rootElementName, rootElementHeader, xmlDeclarationStr, indent, format); } if (leafCount > 0) { yield buildLeafFile(leafContent, disassembledPath, baseName, rootElementName, rootElementHeader, xmlDeclarationStr, format); } if (postPurge) { yield promises.unlink(filePath); } }); } class DisassembleXMLFileHandler { constructor() { this.ign = ignore(); } disassemble(xmlAttributes) { return __awaiter(this, void 0, void 0, function* () { const { filePath, uniqueIdElements, strategy = "unique-id", prePurge = false, postPurge = false, ignorePath = ".xmldisassemblerignore", format = "xml", } = xmlAttributes; const resolvedIgnorePath = node_path.resolve(ignorePath); if (node_fs.existsSync(resolvedIgnorePath)) { const content = yield promises.readFile(resolvedIgnorePath); this.ign.add(content.toString()); } const fileStat = yield promises.stat(filePath); const relativePath = this.posixPath(node_path.relative(process.cwd(), filePath)); if (fileStat.isFile()) { const resolvedPath = node_path.resolve(filePath); if (!resolvedPath.endsWith(".xml")) { logger.error(`The file path provided is not an XML file: ${resolvedPath}`); return; } if (this.ign.ignores(relativePath)) { logger.warn(`File ignored by ${ignorePath}: ${resolvedPath}`); return; } const dirPath = node_path.dirname(resolvedPath); yield this.processFile({ dirPath, strategy, filePath: resolvedPath, uniqueIdElements, prePurge, postPurge, format, }); } else if (fileStat.isDirectory()) { const subFiles = yield promises.readdir(filePath); for (const subFile of subFiles) { const subFilePath = node_path.join(filePath, subFile); const relativeSubFilePath = this.posixPath(node_path.relative(process.cwd(), subFilePath)); if (subFilePath.endsWith(".xml") && !this.ign.ignores(relativeSubFilePath)) { yield this.processFile({ dirPath: filePath, strategy, filePath: subFilePath, uniqueIdElements, prePurge, postPurge, format, }); } else if (this.ign.ignores(relativeSubFilePath)) { logger.warn(`File ignored by ${ignorePath}: ${subFilePath}`); } } } }); } processFile(xmlAttributes) { return __awaiter(this, void 0, void 0, function* () { const { dirPath, strategy, filePath, uniqueIdElements, prePurge, postPurge, format, } = xmlAttributes; logger.debug(`Parsing file to disassemble: ${filePath}`); const fullName = node_path.basename(filePath, node_path.extname(filePath)); const baseName = fullName.split(".")[0]; let outputPath; outputPath = node_path.join(dirPath, baseName); if (prePurge && node_fs.existsSync(outputPath)) yield promises.rm(outputPath, { recursive: true }); if (strategy === "grouped-by-tag") { yield buildDisassembledFiles(filePath, outputPath, fullName, INDENT, postPurge, format); } else { yield buildDisassembledFiles$1(filePath, outputPath, uniqueIdElements, fullName, INDENT, postPurge, format); } }); } posixPath(path) { return path.replace(/\\+/g, "/"); } } function setLogLevel(level) { log4js.getLogger().level = level; } const logger = log4js.getLogger(); log4js.configure({ appenders: { disassemble: { type: "file", filename: "disassemble.log" } }, categories: { default: { appenders: ["disassemble"], level: "error" } }, }); exports.DisassembleXMLFileHandler = DisassembleXMLFileHandler; exports.ReassembleXMLFileHandler = ReassembleXMLFileHandler; exports.buildXMLString = buildXMLString; exports.logger = logger; exports.parseXML = parseXML; exports.setLogLevel = setLogLevel; //# sourceMappingURL=index.cjs.map