xml-disassembler
Version:
Disassemble XML files into smaller, more manageable files and reassemble the XML when needed.
765 lines (740 loc) • 28.8 kB
JavaScript
import { getLogger, configure } from 'log4js';
import { readFile, writeFile, rm, readdir, stat, mkdir, unlink } from 'node:fs/promises';
import { dirname, basename, join } from 'node:path/posix';
import { existsSync } from 'node:fs';
import { relative, resolve, dirname as dirname$1, join as join$1, basename as basename$1, extname } from 'node:path';
import ignore from 'ignore';
import { XMLParser, XMLBuilder } from 'fast-xml-parser';
import { parse as parse$2, stringify as stringify$1 } from 'json5';
import { parse as parse$1, stringify as stringify$2 } from 'smol-toml';
import { parse, stringify as stringify$3 } from 'ini';
import { parse as parse$3, stringify } from 'yaml';
import { createHash } from 'node:crypto';
import { readFile as readFile$1 } from 'fs/promises';
/******************************************************************************
Copyright (c) Microsoft Corporation.
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
***************************************************************************** */
/* global Reflect, Promise, SuppressedError, Symbol, Iterator */
function __awaiter(thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
}
typeof SuppressedError === "function" ? SuppressedError : function (error, suppressed, message) {
var e = new Error(message);
return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e;
};
function mergeXmlElements(elements) {
if (elements.length === 0) {
logger.error("No elements to merge.");
return;
}
const first = elements[0];
const rootKey = Object.keys(first).find((k) => k !== "?xml");
const mergedContent = {};
for (const element of elements) {
mergeElementContent(mergedContent, element[rootKey]);
}
return buildFinalXmlElement(first["?xml"], rootKey, mergedContent);
}
function mergeElementContent(target, source) {
for (const [key, value] of Object.entries(source)) {
if (Array.isArray(value)) {
mergeArrayValue(target, key, value);
}
else if (isMergeableObject(value)) {
mergeObjectValue(target, key, value);
}
else {
mergePrimitiveValue(target, key, value);
}
}
}
function mergeArrayValue(target, key, value) {
if (!target[key]) {
target[key] = value;
}
else if (Array.isArray(target[key])) {
target[key].push(...value);
}
else {
target[key] = [target[key], ...value];
}
}
function mergeObjectValue(target, key, value) {
if (Array.isArray(target[key])) {
target[key].push(value);
}
else if (target[key]) {
target[key] = [target[key], value];
}
else {
target[key] = value;
}
}
function mergePrimitiveValue(target, key, value) {
if (!Object.prototype.hasOwnProperty.call(target, key)) {
target[key] = value;
}
}
function isMergeableObject(value) {
return typeof value === "object" && value !== null;
}
function buildFinalXmlElement(declaration, rootKey, content) {
return declaration
? { "?xml": declaration, [rootKey]: content }
: { [rootKey]: content };
}
const INDENT = " ";
const XML_PARSER_OPTION = {
commentPropName: "!---",
ignoreAttributes: false,
ignoreNameSpace: false,
parseTagValue: false,
parseNodeValue: false,
parseAttributeValue: false,
trimValues: false,
processEntities: false,
cdataPropName: "![CDATA[",
};
const JSON_PARSER_OPTION = Object.assign(Object.assign({}, XML_PARSER_OPTION), { format: true, indentBy: INDENT, suppressBooleanAttributes: false, suppressEmptyNode: false });
function isEmptyTextNode(key, value) {
return key === "#text" && typeof value === "string" && value.trim() === "";
}
function cleanArray(arr) {
return arr
.map(stripWhitespaceTextNodes)
.filter((entry) => !(typeof entry === "object" && Object.keys(entry).length === 0));
}
function cleanObject(obj) {
const result = {};
for (const key in obj) {
const value = obj[key];
if (isEmptyTextNode(key, value))
continue;
const cleaned = stripWhitespaceTextNodes(value);
if (cleaned !== undefined) {
result[key] = cleaned;
}
}
return result;
}
function stripWhitespaceTextNodes(node) {
if (Array.isArray(node)) {
return cleanArray(node);
}
else if (typeof node === "object" && node !== null) {
return cleanObject(node);
}
else {
return node;
}
}
let cachedParser = null;
function getParser() {
if (!cachedParser) {
cachedParser = new XMLParser(XML_PARSER_OPTION);
}
return cachedParser;
}
function parseXML(filePath) {
return __awaiter(this, void 0, void 0, function* () {
const xmlParser = getParser();
let xmlParsed;
try {
const xmlContent = yield readFile(filePath, "utf-8");
xmlParsed = xmlParser.parse(xmlContent, true);
const cleaned = stripWhitespaceTextNodes(xmlParsed);
return cleaned;
}
catch (err) {
logger.error(`${filePath} was unabled to be parsed and will not be processed. Confirm formatting and try again.`);
return undefined;
}
});
}
const parsers = {
".yaml": parse$3,
".yml": parse$3,
".json": JSON.parse,
".json5": parse$2,
".toml": parse$1,
".ini": parse,
};
function parseToXmlObject(filePath) {
return __awaiter(this, void 0, void 0, function* () {
if (filePath.endsWith(".xml")) {
return yield parseXML(filePath);
}
const ext = Object.keys(parsers).find((ext) => filePath.endsWith(ext));
const fileContent = yield readFile$1(filePath, "utf-8");
return parsers[ext](fileContent);
});
}
class ReassembleXMLFileHandler {
reassemble(xmlAttributes) {
return __awaiter(this, void 0, void 0, function* () {
const { filePath, fileExtension, postPurge = false } = xmlAttributes;
if (!(yield this._validateDirectory(filePath)))
return;
logger.debug(`Parsing directory to reassemble: ${filePath}`);
const parsedXmlObjects = yield this.processFilesInDirectory(filePath);
if (!parsedXmlObjects.length) {
this._logEmptyParseError(filePath);
return;
}
const mergedXml = mergeXmlElements(parsedXmlObjects);
const finalXml = buildXMLString(mergedXml);
const outputPath = this._getOutputPath(filePath, fileExtension);
yield writeFile(outputPath, finalXml, "utf-8");
if (postPurge)
yield rm(filePath, { recursive: true });
});
}
processFilesInDirectory(dirPath) {
return __awaiter(this, void 0, void 0, function* () {
const parsedXmlObjects = [];
const files = yield readdir(dirPath);
const sortedFiles = this._sortFilesByBaseName(files);
const statPromises = sortedFiles.map((file) => stat(join(dirPath, file)).then((stats) => ({ file, stats })));
const fileStats = yield Promise.all(statPromises);
for (const { file, stats } of fileStats) {
const filePath = join(dirPath, file);
if (stats.isFile() && this._isParsableFile(file)) {
const parsed = yield parseToXmlObject(filePath);
if (parsed)
parsedXmlObjects.push(parsed);
}
else if (stats.isDirectory()) {
const subParsed = yield this.processFilesInDirectory(filePath);
parsedXmlObjects.push(...subParsed);
}
}
return parsedXmlObjects;
});
}
_sortFilesByBaseName(files) {
return files.sort((a, b) => a.split(".")[0].localeCompare(b.split(".")[0]));
}
_isParsableFile(fileName) {
return /\.(xml|json|json5|ya?ml|toml|ini)$/i.test(fileName);
}
_validateDirectory(path) {
return __awaiter(this, void 0, void 0, function* () {
const stats = yield stat(path);
if (!stats.isDirectory()) {
logger.error(`The provided path to reassemble is not a directory: ${path}`);
return false;
}
return true;
});
}
_logEmptyParseError(path) {
logger.error(`No files under ${path} were parsed successfully. A reassembled XML file was not created.`);
}
_getOutputPath(dirPath, extension) {
const parentDir = dirname(dirPath);
const baseName = basename(dirPath);
const fileName = `${baseName}.${extension !== null && extension !== void 0 ? extension : "xml"}`;
return join(parentDir, fileName);
}
}
let cachedBuilder = null;
function getBuilder() {
if (!cachedBuilder) {
cachedBuilder = new XMLBuilder(JSON_PARSER_OPTION);
}
return cachedBuilder;
}
function buildXMLString(element) {
const xmlBuilder = getBuilder();
return xmlBuilder.build(element).trimEnd();
}
function transformToYaml(parsedXml) {
return __awaiter(this, void 0, void 0, function* () {
const yamlString = stringify(parsedXml);
return yamlString;
});
}
function transformToJson5(parsedXml) {
return __awaiter(this, void 0, void 0, function* () {
const jsonString = stringify$1(parsedXml, null, 2);
return jsonString;
});
}
function transformToJson(parsedXml) {
return __awaiter(this, void 0, void 0, function* () {
const jsonString = JSON.stringify(parsedXml, null, 2);
return jsonString;
});
}
function transformToToml(parsedXml) {
return __awaiter(this, void 0, void 0, function* () {
const tomlString = stringify$2(parsedXml);
return tomlString;
});
}
function transformToIni(parsedXml) {
return __awaiter(this, void 0, void 0, function* () {
const iniString = stringify$3(parsedXml);
return iniString;
});
}
const transformers = {
yaml: transformToYaml,
json5: transformToJson5,
json: transformToJson,
toml: transformToToml,
ini: transformToIni,
};
function getTransformer(format) {
return transformers[format];
}
const stringifyCache = new WeakMap();
function parseUniqueIdElement(element, uniqueIdElements) {
var _a, _b;
if (!uniqueIdElements) {
return createShortHash(element);
}
const id = (_b = (_a = findDirectFieldMatch(element, uniqueIdElements.split(","))) !== null && _a !== void 0 ? _a : findNestedFieldMatch(element, uniqueIdElements)) !== null && _b !== void 0 ? _b : createShortHash(element);
return id;
}
function findDirectFieldMatch(element, fieldNames) {
for (const name of fieldNames) {
const value = element[name];
if (typeof value === "string") {
return value;
}
}
}
function findNestedFieldMatch(element, uniqueIdElements) {
for (const key in element) {
const child = element[key];
if (!isObject(child))
continue;
const result = parseUniqueIdElement(child, uniqueIdElements);
if (result)
return result;
}
}
function isObject(value) {
return typeof value === "object" && value !== null && !Array.isArray(value);
}
function createShortHash(element) {
let stringified = stringifyCache.get(element);
if (!stringified) {
stringified = JSON.stringify(element);
stringifyCache.set(element, stringified);
}
const hash = createHash("sha256").update(stringified).digest("hex");
return hash.slice(0, 8);
}
function buildDisassembledFile(_a) {
return __awaiter(this, arguments, void 0, function* ({ content, disassembledPath, outputFileName, subdirectory, wrapKey, isGroupedArray = false, rootElementName, rootAttributes, xmlDeclaration, format, uniqueIdElements, }) {
const targetDirectory = subdirectory
? join(disassembledPath, subdirectory)
: disassembledPath;
let fileName = outputFileName;
if (!fileName && wrapKey && !isGroupedArray && typeof content === "object") {
const fieldName = parseUniqueIdElement(content, uniqueIdElements);
fileName = `${fieldName}.${wrapKey}-meta.${format}`;
}
const outputPath = join(targetDirectory, fileName);
yield mkdir(targetDirectory, { recursive: true });
let wrappedXml = {
[rootElementName]: Object.assign(Object.assign({}, rootAttributes), (wrapKey
? { [wrapKey]: isGroupedArray ? content : content }
: content)),
};
if (typeof xmlDeclaration === "object" && xmlDeclaration !== null) {
wrappedXml = Object.assign({ "?xml": xmlDeclaration }, wrappedXml);
}
const transformer = getTransformer(format);
const outputString = transformer
? yield transformer(wrappedXml)
: buildXMLString(wrappedXml);
yield writeFile(outputPath, outputString);
logger.debug(`Created disassembled file: ${outputPath}`);
});
}
function extractRootAttributes(element) {
const attributes = {};
for (const [key, value] of Object.entries(element)) {
if (key.startsWith("@") && typeof value === "string") {
attributes[key] = value;
}
}
return attributes;
}
function parseElementUnified(params) {
return __awaiter(this, void 0, void 0, function* () {
const { element, disassembledPath, uniqueIdElements, rootElementName, rootAttributes, key, leafCount, hasNestedElements, format, xmlDeclaration, strategy, } = params;
const isArray = Array.isArray(element);
const isNestedObject = typeof element === "object" &&
element !== null &&
Object.keys(element).some((k) => !k.startsWith("#"));
const isNested = isArray || isNestedObject;
if (isNested) {
if (strategy === "grouped-by-tag") {
return {
leafContent: {},
leafCount,
hasNestedElements: true,
nestedGroups: { [key]: [element] },
};
}
else {
yield buildDisassembledFile({
content: element,
disassembledPath,
subdirectory: key,
wrapKey: key,
rootElementName,
rootAttributes,
xmlDeclaration,
format,
uniqueIdElements,
});
return {
leafContent: {},
leafCount,
hasNestedElements: true,
};
}
}
return {
leafContent: {
[key]: [element],
},
leafCount: leafCount + 1,
hasNestedElements,
};
});
}
function buildDisassembledFilesUnified(_a) {
return __awaiter(this, arguments, void 0, function* ({ filePath, disassembledPath, baseName, postPurge, format, uniqueIdElements, strategy, }) {
const parsedXml = yield parseXML(filePath);
if (!parsedXml)
return;
const { rootElementName, rootElement, xmlDeclaration } = getRootInfo(parsedXml);
const rootAttributes = extractRootAttributes(rootElement);
const keyOrder = Object.keys(rootElement).filter((k) => !k.startsWith("@"));
const { leafContent, nestedGroups, leafCount, hasNestedElements } = yield disassembleElementKeys({
rootElement,
keyOrder,
disassembledPath,
rootElementName,
rootAttributes,
xmlDeclaration,
uniqueIdElements,
strategy,
format,
});
if (shouldAbortForLeafOnly(leafCount, hasNestedElements, filePath))
return;
yield writeNestedGroups(nestedGroups, strategy, {
disassembledPath,
rootElementName,
rootAttributes,
xmlDeclaration,
format,
});
yield writeLeafContentIfAny({
leafCount,
leafContent,
strategy,
keyOrder,
options: {
disassembledPath,
outputFileName: `${baseName}.${format}`,
rootElementName,
rootAttributes,
xmlDeclaration,
format,
},
});
if (postPurge) {
yield unlink(filePath);
}
});
}
function shouldAbortForLeafOnly(leafCount, hasNestedElements, filePath) {
if (!hasNestedElements && leafCount > 0) {
logger.error(`The XML file ${filePath} only has leaf elements. This file will not be disassembled.`);
return true;
}
return false;
}
function writeLeafContentIfAny(_a) {
return __awaiter(this, arguments, void 0, function* ({ leafCount, leafContent, strategy, keyOrder, options, }) {
if (leafCount === 0)
return;
const finalLeafContent = strategy === "grouped-by-tag"
? orderXmlElementKeys(leafContent, keyOrder)
: leafContent;
yield buildDisassembledFile(Object.assign({ content: finalLeafContent }, options));
});
}
function getRootInfo(parsedXml) {
const rawDeclaration = parsedXml["?xml"];
const xmlDeclaration = typeof rawDeclaration === "object" && rawDeclaration !== null
? rawDeclaration
: undefined;
const rootElementName = Object.keys(parsedXml).find((k) => k !== "?xml");
const rootElement = parsedXml[rootElementName];
return { rootElementName, rootElement, xmlDeclaration };
}
function orderXmlElementKeys(content, keyOrder) {
const ordered = {};
for (const key of keyOrder) {
if (content[key] !== undefined) {
ordered[key] = content[key];
}
}
return ordered;
}
function disassembleElementKeys(_a) {
return __awaiter(this, arguments, void 0, function* ({ rootElement, keyOrder, disassembledPath, rootElementName, rootAttributes, xmlDeclaration, uniqueIdElements, strategy, format, }) {
var _b, _c;
let leafContent = {};
let nestedGroups = {};
let leafCount = 0;
let hasNestedElements = false;
const BATCH_SIZE = 20;
for (const key of keyOrder) {
const elements = Array.isArray(rootElement[key])
? rootElement[key]
: [rootElement[key]];
for (let i = 0; i < elements.length; i += BATCH_SIZE) {
const batch = elements.slice(i, i + BATCH_SIZE);
const batchResults = yield Promise.all(batch.map((element, index) => parseElementUnified({
element,
disassembledPath,
uniqueIdElements,
rootElementName,
rootAttributes,
key,
leafCount,
hasNestedElements,
format,
xmlDeclaration,
strategy,
})));
for (let j = 0; j < batchResults.length; j++) {
const result = batchResults[j];
if (result.leafContent[key]) {
leafContent[key] = [
...((_b = leafContent[key]) !== null && _b !== void 0 ? _b : []),
...result.leafContent[key],
];
}
if (strategy === "grouped-by-tag" && result.nestedGroups) {
for (const tag in result.nestedGroups) {
nestedGroups[tag] = [
...((_c = nestedGroups[tag]) !== null && _c !== void 0 ? _c : []),
...result.nestedGroups[tag],
];
}
}
leafCount = result.leafCount;
hasNestedElements = result.hasNestedElements;
}
}
}
return { leafContent, nestedGroups, leafCount, hasNestedElements };
});
}
function writeNestedGroups(nestedGroups, strategy, options) {
return __awaiter(this, void 0, void 0, function* () {
if (strategy !== "grouped-by-tag")
return;
for (const tag in nestedGroups) {
yield buildDisassembledFile({
content: nestedGroups[tag],
disassembledPath: options.disassembledPath,
outputFileName: `${tag}.${options.format}`,
wrapKey: tag,
isGroupedArray: true,
rootElementName: options.rootElementName,
rootAttributes: options.rootAttributes,
xmlDeclaration: options.xmlDeclaration,
format: options.format,
});
}
});
}
class AsyncTaskQueue {
constructor(concurrency = 10) {
this.queue = [];
this.running = 0;
this.concurrency = concurrency;
}
add(task) {
return __awaiter(this, void 0, void 0, function* () {
return new Promise((resolve, reject) => {
const wrappedTask = () => __awaiter(this, void 0, void 0, function* () {
try {
const result = yield task();
resolve(result);
}
catch (error) {
reject(error);
}
});
this.queue.push(wrappedTask);
this.process();
});
});
}
process() {
return __awaiter(this, void 0, void 0, function* () {
if (this.running >= this.concurrency || this.queue.length === 0) {
return;
}
this.running++;
const task = this.queue.shift();
try {
yield task();
}
finally {
this.running--;
this.process();
}
});
}
waitForCompletion() {
return __awaiter(this, void 0, void 0, function* () {
while (this.running > 0 || this.queue.length > 0) {
yield new Promise((resolve) => setTimeout(resolve, 10));
}
});
}
}
class DisassembleXMLFileHandler {
constructor() {
this.ign = ignore();
this.taskQueue = new AsyncTaskQueue(10);
}
disassemble(xmlAttributes) {
return __awaiter(this, void 0, void 0, function* () {
let { filePath, uniqueIdElements, strategy = "unique-id", prePurge = false, postPurge = false, ignorePath = ".xmldisassemblerignore", format = "xml", } = xmlAttributes;
if (!["unique-id", "grouped-by-tag"].includes(strategy)) {
logger.warn(`Unsupported strategy "${strategy}", defaulting to "unique-id".`);
strategy = "unique-id";
}
yield this._loadIgnoreRules(ignorePath);
const fileStat = yield stat(filePath);
const relativePath = this.posixPath(relative(process.cwd(), filePath));
if (fileStat.isFile()) {
yield this._handleFile(filePath, relativePath, {
uniqueIdElements,
strategy,
prePurge,
postPurge,
format,
});
}
else if (fileStat.isDirectory()) {
yield this._handleDirectory(filePath, {
uniqueIdElements,
strategy,
prePurge,
postPurge,
format,
ignorePath,
});
}
});
}
_loadIgnoreRules(ignorePath) {
return __awaiter(this, void 0, void 0, function* () {
const resolvedIgnorePath = resolve(ignorePath);
if (existsSync(resolvedIgnorePath)) {
const content = yield readFile(resolvedIgnorePath);
this.ign.add(content.toString());
}
});
}
_handleFile(filePath, relativePath, options) {
return __awaiter(this, void 0, void 0, function* () {
const resolvedPath = resolve(filePath);
if (!this._isXmlFile(resolvedPath)) {
logger.error(`The file path provided is not an XML file: ${resolvedPath}`);
return;
}
if (this.ign.ignores(relativePath)) {
logger.warn(`File ignored by ignore rules: ${resolvedPath}`);
return;
}
const dirPath = dirname$1(resolvedPath);
yield this.processFile(Object.assign(Object.assign({}, options), { dirPath, filePath: resolvedPath }));
});
}
_handleDirectory(dirPath, options) {
return __awaiter(this, void 0, void 0, function* () {
const subFiles = yield readdir(dirPath);
const processingPromises = subFiles.map((subFile) => __awaiter(this, void 0, void 0, function* () {
const subFilePath = join$1(dirPath, subFile);
const relativeSubFilePath = this.posixPath(relative(process.cwd(), subFilePath));
if (this._isXmlFile(subFilePath) &&
!this.ign.ignores(relativeSubFilePath)) {
return this.taskQueue.add(() => this.processFile(Object.assign(Object.assign({}, options), { dirPath, filePath: subFilePath })));
}
else if (this.ign.ignores(relativeSubFilePath)) {
logger.warn(`File ignored by ignore rules: ${subFilePath}`);
}
}));
yield Promise.all(processingPromises);
});
}
_isXmlFile(filePath) {
return filePath.endsWith(".xml");
}
processFile(xmlAttributes) {
return __awaiter(this, void 0, void 0, function* () {
const { dirPath, strategy, filePath, uniqueIdElements, prePurge, postPurge, format, } = xmlAttributes;
logger.debug(`Parsing file to disassemble: ${filePath}`);
const fullName = basename$1(filePath, extname(filePath));
const baseName = fullName.split(".")[0];
const outputPath = join$1(dirPath, baseName);
if (prePurge && existsSync(outputPath)) {
yield rm(outputPath, { recursive: true });
}
yield buildDisassembledFilesUnified({
filePath,
disassembledPath: outputPath,
uniqueIdElements,
baseName: fullName,
postPurge,
format,
strategy,
});
});
}
posixPath(path) {
return path.replace(/\\+/g, "/");
}
}
function setLogLevel(level) {
getLogger().level = level;
}
const logger = getLogger();
configure({
appenders: { disassemble: { type: "file", filename: "disassemble.log" } },
categories: { default: { appenders: ["disassemble"], level: "error" } },
});
export { DisassembleXMLFileHandler, ReassembleXMLFileHandler, buildXMLString, logger, parseXML, setLogLevel, transformToIni, transformToJson, transformToJson5, transformToToml, transformToYaml };
//# sourceMappingURL=index.mjs.map