node-unrtf
Version:
Asynchronous Node.js wrapper for the UnRTF RTF conversion program
326 lines (290 loc) • 10 kB
JavaScript
;
const { spawn, spawnSync } = require("node:child_process");
const { open } = require("node:fs/promises");
const { normalize, resolve: pathResolve } = require("node:path");
const { platform } = require("node:process");
const { gt, lt } = require("semver");
const ERROR_MSGS = {
3221225477: "Segmentation fault",
};
const RTF_MAGIC_NUMBER = "{\\rtf1";
const RTF_MAGIC_BUFFER = Buffer.from(RTF_MAGIC_NUMBER);
const RTF_MAGIC_NUMBER_LENGTH = RTF_MAGIC_NUMBER.length;
const RTF_MAGIC_NUMBER_LENGTH_BUFFER = Buffer.alloc(RTF_MAGIC_NUMBER_LENGTH);
// Cache immutable regex as they are expensive to create and garbage collect
const UNRTF_PATH_REG = /(.+)unrtf/u;
// UnRTF version output is inconsistent between versions but always starts with the semantic version number
const UNRTF_VERSION_REG = /^(\d{1,2}\.\d{1,2}\.\d{1,2})/u;
/**
* @typedef {object} OptionDetails
* @property {string} arg The argument to pass to the binary.
* @property {('boolean'|'number'|'string')} type The type of the option.
* @property {string} minVersion The minimum version of the binary that supports this option.
* @property {string} [maxVersion] The maximum version of the binary that supports this option (optional).
*/
/**
* @typedef {Record<string, OptionDetails>} UnRTFAcceptedOptions
*/
/**
* @typedef UnRTFOptions
* @property {boolean} [noPictures] Disable the automatic storing of embedded
* pictures to the current working directory.
* @property {boolean} [noRemap] Disable charset conversion (only works for 8-bit charsets)
* (UnRTF v0.20.5 or later only).
* @property {boolean} [outputHtml] Generate HTML output.
* @property {boolean} [outputLatex] Generate LaTeX output.
* @property {boolean} [outputPs] Generate PostScript (PS) output (UnRTF v0.19.4 or earlier only).
* @property {boolean} [outputRtf] Generate RTF output. (UnRTF v0.21.3 or later only).
* @property {boolean} [outputText] Generate ASCII text output.
* @property {boolean} [outputVt] Generate text output with VT100 escape codes.
* @property {boolean} [outputWpml] Generate WPML output (UnRTF v0.19.4 or earlier only).
* @property {boolean} [printVersionInfo] Print copyright and version info.
* @property {boolean} [quiet] Do not print any leading comments in output (UnRTF v0.21.3 or later only).
*/
/**
* @author Frazer Smith
* @description Checks each option provided is valid, of the correct type, and can be used by specified
* version of binary.
* @ignore
* @param {UnRTFAcceptedOptions} acceptedOptions - Object containing accepted options.
* @param {Record<string, any>} options - Object containing options to pass to binary.
* @param {string} version - Semantic version of binary.
* @returns {string[]} Array of CLI arguments.
* @throws If invalid arguments provided.
*/
function parseOptions(acceptedOptions, options, version) {
/** @type {string[]} */
const args = [];
/** @type {string[]} */
const invalidArgs = [];
// Imperative loops are faster than functional loops, see https://romgrk.com/posts/optimizing-javascript
const entries = Object.entries(options);
const entriesLength = entries.length;
for (let i = 0; i < entriesLength; i += 1) {
// Destructuring adds overhead, so use index access
const key = entries[i][0];
if (Object.hasOwn(acceptedOptions, key)) {
const option = entries[i][1];
const acceptedOption = acceptedOptions[key];
if (acceptedOption.type === typeof option) {
// Skip boolean options if false
if (acceptedOption.type !== "boolean" || option) {
args.push(acceptedOption.arg);
}
} else {
invalidArgs.push(
`Invalid value type provided for option '${key}', expected ${
acceptedOption.type
} but received ${typeof option}`
);
}
/* istanbul ignore next: unable to test due to https://github.com/jestjs/jest/pull/14297 */
if (lt(version, acceptedOption.minVersion)) {
invalidArgs.push(
`Invalid option provided for the current version of the binary used. '${key}' was introduced in v${acceptedOption.minVersion}, but received v${version}`
);
}
/* istanbul ignore next: unable to test due to https://github.com/jestjs/jest/pull/14297 */
if (gt(version, acceptedOption.maxVersion || version)) {
invalidArgs.push(
`Invalid option provided for the current version of the binary used. '${key}' is only present up to v${acceptedOption.maxVersion}, but received v${version}`
);
}
} else {
invalidArgs.push(`Invalid option provided '${key}'`);
}
}
if (invalidArgs.length === 0) {
return args;
}
throw new Error(invalidArgs.join("; "));
}
class UnRTF {
#unrtfBin;
#unrtfPath;
#unrtfVersion;
/**
* @param {string} [binPath] - Path of UnRTF binary.
* If not provided, the constructor will attempt to find the binary
* in the PATH environment variable.
*
* For `win32`, a binary is bundled with the package and will be used
* if a local installation is not found.
*/
constructor(binPath) {
this.#unrtfPath = "";
/* istanbul ignore else: requires specific OS */
if (binPath) {
/** @type {string|undefined} */
this.#unrtfPath = binPath;
} else {
const which = spawnSync(platform === "win32" ? "where" : "which", [
"unrtf",
]).stdout.toString();
const unrtfPath = UNRTF_PATH_REG.exec(which)?.[1];
if (unrtfPath) {
this.#unrtfPath = unrtfPath;
}
if (platform === "win32" && !unrtfPath) {
this.#unrtfPath = pathResolve(
__dirname,
"lib",
"win32",
"unrtf-0.19.3",
"bin"
);
}
}
/* istanbul ignore next: unable to test due to https://github.com/jestjs/jest/pull/14297 */
if (!this.#unrtfPath) {
throw new Error(
`Unable to find ${platform} UnRTF binaries, please pass the installation directory as a parameter to the UnRTF instance.`
);
}
this.#unrtfPath = normalize(this.#unrtfPath);
this.#unrtfBin = pathResolve(this.#unrtfPath, "unrtf");
// Version needed for option validation; which is output to stderr
const version = spawnSync(this.#unrtfBin, [
"--version",
]).stderr.toString();
this.#unrtfVersion = UNRTF_VERSION_REG.exec(version)?.[1] || "";
/* istanbul ignore next: unable to test due to https://github.com/jestjs/jest/pull/14297 */
if (!this.#unrtfVersion) {
throw new Error("Unable to determine UnRTF version.");
}
/** @type {UnRTFAcceptedOptions} */
this.unrtfAcceptedOptions = {
noPictures: {
arg: "--nopict",
type: "boolean",
minVersion: "0.0.1",
},
noRemap: {
arg: "--noremap",
type: "boolean",
minVersion: "0.20.5",
},
outputHtml: {
arg: "--html",
type: "boolean",
minVersion: "0.0.1",
},
outputLatex: {
arg: "--latex",
type: "boolean",
minVersion: "0.0.1",
},
outputPs: {
arg: "--ps",
type: "boolean",
minVersion: "0.0.1",
maxVersion: "0.19.4",
},
outputRtf: { arg: "--rtf", type: "boolean", minVersion: "0.21.3" },
outputText: { arg: "--text", type: "boolean", minVersion: "0.0.1" },
outputVt: { arg: "--vt", type: "boolean", minVersion: "0.0.1" },
outputWpml: {
arg: "--wpml",
type: "boolean",
minVersion: "0.0.1",
maxVersion: "0.19.4",
},
printVersionInfo: {
arg: "--version",
type: "boolean",
minVersion: "0.0.1",
},
quiet: { arg: "--quiet", type: "boolean", minVersion: "0.21.3" },
};
}
/**
* @description Returns the path of the UnRTF binary.
* @returns {string} Path of UnRTF binary.
*/
get path() {
return this.#unrtfPath;
}
/**
* @description Returns the version of the UnRTF binary.
* @returns {string} Version of UnRTF binary.
*/
get version() {
return this.#unrtfVersion;
}
/**
* @author Frazer Smith
* @description Converts an RTF file to HTML/LaTeX/RTF/TXT.
* Defaults to HTML output if no `output*` options are provided.
* UnRTF will use the directory of the original file to store embedded pictures.
* @param {string} file - Filepath of the RTF file to read.
* @param {UnRTFOptions} [options] - Options to pass to UnRTF binary.
* @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
*/
async convert(file, options = {}) {
let normalizedFile;
// Catch empty strings, missing files, and non-RTF files, as UnRTF will attempt to convert them
let fileHandle;
try {
normalizedFile = normalize(file);
// eslint-disable-next-line security/detect-non-literal-fs-filename -- File open is wanted
fileHandle = await open(normalizedFile, "r");
const { buffer } = await fileHandle.read(
RTF_MAGIC_NUMBER_LENGTH_BUFFER,
0,
RTF_MAGIC_NUMBER_LENGTH,
0
);
// Check for RTF specific magic number
if (!buffer.equals(RTF_MAGIC_BUFFER)) {
throw new Error(
"File is not the correct media type, expected 'application/rtf'"
);
}
} catch (err) {
// @ts-ignore: code property found in fs errors
if (err instanceof Error && err.code !== "ENOENT") {
throw err;
}
throw new Error("File missing");
} finally {
await fileHandle?.close();
}
const args = parseOptions(
this.unrtfAcceptedOptions,
options,
this.#unrtfVersion
);
args.push(normalizedFile);
return new Promise((resolve, reject) => {
const child = spawn(this.#unrtfBin, args);
let stdOut = "";
let stdErr = "";
child.stdout.on("data", (data) => {
stdOut += data;
});
child.stderr.on("data", (data) => {
stdErr += data;
});
child.on("close", (code) => {
/* istanbul ignore else */
if (stdOut !== "") {
resolve(stdOut.trim());
} else if (stdErr === "") {
reject(
new Error(
// @ts-ignore: Second operand used if code is not in ERROR_MSGS
ERROR_MSGS[code] ||
`unrtf ${args.join(
" "
)} exited with code ${code}`
)
);
} else {
reject(new Error(stdErr.trim()));
}
});
});
}
}
module.exports.default = UnRTF; // ESM default export
module.exports.UnRTF = UnRTF; // TypeScript and named export