deepl-node
Version:
deepl-node is the official DeepL Node.js client library
350 lines (349 loc) • 16.5 kB
JavaScript
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.DocumentMinifier = void 0;
const path = __importStar(require("path"));
const fs = __importStar(require("fs"));
const os = __importStar(require("os"));
const uuid_1 = require("uuid");
const errors_1 = require("./errors");
const adm_zip_1 = __importDefault(require("adm-zip"));
const fsHelper_1 = require("./fsHelper");
/**
* Class that implements document minification: Stripping supported files like pptx and docx
* of their media (images, videos, etc) before uploading them to the DeepL API to be translated.
* This allows users to translate files that would usually hit the size limit for files.
*
* @note Please note the following:
* 1. To use this class, you first need to check by calling {@link DocumentMinifier.canMinifyFile}
* if the file type is supported. This class performs no further checks.
* 2. The {@link DocumentMinifier} is stateful, so you cannot use it to minify multiple documents at once.
* You need to create a new {@link DocumentMinifier} object per document.
* 3. Be very careful when providing a custom `tempDir` when instantiating the class. For example,
* {@link DocumentMinifier.deminifyDocument} will delete the entire `tempDir` with
* `cleanup` set to `true` (disabled by default). In order not to lose any data, ideally always
* call `new DocumentMinifier()` in order to get a fresh temporary directory.
* 4. If an error occurs during minification, either a {@link DocumentMinificationError} or a
* {@link DocumentDeminificationError} will be thrown, depending on which phase the error
* occurred in.
*
* The document minification process works in 2 phases:
* 1. Minification: The document is extracted into a temporary directory, the media files are backed up,
* the media in the document is replaced with placeholders and a minified document is created.
* 2. Deminification: The minified document is extracted into a temporary directory, the media backups are
* reinserted into the extracted document, and the document is deminified into the output path.
*
* If `cleanup` is enabled, the minification phase will delete the folder with the extracted document
* and the deminification phase will delete the entire temporary directory.
* Note that by default, the input file will be kept on disk, and as such no further backups of media etc.
* are made (as they are all available from the input file).
*
* @example
* const inputFile = "/home/exampleUser/document.pptx";
* const outputFile = "/home/exampleUser/document_ES.pptx";
* const minifier = new DocumentMinifier();
* if (DocumentMinifier.canMinifyFile(inputFile)) {
* try {
* const minifiedFile = minifier.minifyDocument(inputFile, true);
* // process file minifiedFile, e.g. translate it with DeepL
* minifier.deminifyDocument(minifiedFile, outputFile, true);
* // process file outputFile
* } catch (e) {
* if (e instanceof DocumentMinificationError) {
* // handle error during minification, e.g. print list of media, clean up temporary directory, etc
* } else if (e instanceof DocumentDeminificationError) {
* // handle error during deminification, e.g. save minified document, clean up temporary directory, etc
* } else if (e instanceof DocumentTranslationError) {
* // handle general DocTrans error (mostly useful if document is translated between minification
* // and deminification)
* }
* }
* }
*/
class DocumentMinifier {
constructor(tempDir) {
this._tempDir = tempDir !== null && tempDir !== void 0 ? tempDir : DocumentMinifier.createTemporaryDirectory();
}
/**
* Checks if a given file can be minified or not
* @param inputFilePath The path to the file
* @returns true if the file can be minified, otherwise false
*/
static canMinifyFile(inputFilePath) {
return (inputFilePath !== undefined &&
inputFilePath !== null &&
inputFilePath.trim() !== '' &&
DocumentMinifier.SUPPORTED_DOCUMENT_TYPES.includes(path.extname(inputFilePath).toLowerCase()));
}
/**
* Gets the path for where the minified version of the input file will live
* @param inputFilePath The path to the file
* @returns The path to the minified version of the file
*/
getMinifiedDocFile(inputFilePath) {
const minifiedDocFileName = DocumentMinifier.MINIFIED_DOC_FILE_BASE_NAME + path.extname(inputFilePath);
return path.join(this._tempDir, minifiedDocFileName);
}
/**
* Gets the path to the directory where the input file will be extracted to
* @returns The path to the directory where the input file will be extracted to
*/
getExtractedDocDirectory() {
return path.join(this._tempDir, DocumentMinifier.EXTRACTED_DOC_DIR_NAME);
}
/**
* Gets the path to the directory where the original media was extracted to
* @returns The path to the media directory containing the original media
*/
getOriginalMediaDirectory() {
return path.join(this._tempDir, DocumentMinifier.ORIGINAL_MEDIA_DIR_NAME);
}
minifyDocument(inputFilePath, cleanup = false) {
const extractedDocDirectory = this.getExtractedDocDirectory();
const mediaDir = this.getOriginalMediaDirectory();
const minifiedDocFilePath = this.getMinifiedDocFile(inputFilePath);
try {
this.extractZipToDirectory(inputFilePath, extractedDocDirectory);
}
catch (error) {
throw new errors_1.DocumentMinificationError(`Error when extracting document: Failed to extract ${inputFilePath} to ${extractedDocDirectory}. Error: ${error}`);
}
this.exportMediaToMediaDirAndReplace(extractedDocDirectory, mediaDir);
try {
this.createZipFromDirectory(extractedDocDirectory, minifiedDocFilePath);
}
catch (error) {
throw new errors_1.DocumentMinificationError(`Failed creating a zip file at ${minifiedDocFilePath}. Error: ${error}`);
}
if (cleanup) {
try {
fsHelper_1.FsHelper.removeSyncRecursive(extractedDocDirectory);
}
catch (error) {
throw new errors_1.DocumentMinificationError(`Failed to delete directory ${extractedDocDirectory}. Error: ${error}`);
}
}
const fileSize = fs.statSync(minifiedDocFilePath).size;
if (fileSize > DocumentMinifier.MINIFIED_DOC_SIZE_LIMIT_WARNING) {
console.error('The input file could not be minified below 5 MB, likely a media type is missing. ' +
'This might cause the translation to fail.');
}
return minifiedDocFilePath;
}
deminifyDocument(inputFilePath, outputFilePath, cleanup = false) {
const extractedDocDirectory = this.getExtractedDocDirectory();
const mediaDir = this.getOriginalMediaDirectory();
if (!fs.existsSync(extractedDocDirectory)) {
try {
fs.mkdirSync(extractedDocDirectory);
}
catch (error) {
throw new errors_1.DocumentDeminificationError(`Error when deminifying, could not create directory at ${extractedDocDirectory}. Error: ${error}`);
}
}
try {
this.extractZipToDirectory(inputFilePath, extractedDocDirectory);
}
catch (error) {
throw new errors_1.DocumentDeminificationError(`Error when extracting document: Failed to extract ${inputFilePath} to ${extractedDocDirectory}. Error: ${error}`);
}
this.replaceMediaInDir(extractedDocDirectory, mediaDir);
try {
if (fs.existsSync(outputFilePath)) {
fs.unlinkSync(outputFilePath);
}
this.createZipFromDirectory(extractedDocDirectory, outputFilePath);
}
catch (error) {
throw new errors_1.DocumentMinificationError(`Failed creating a zip file at ${outputFilePath}. Error: ${error}`);
}
if (cleanup) {
try {
fsHelper_1.FsHelper.removeSyncRecursive(this._tempDir);
}
catch (error) {
throw new errors_1.DocumentMinificationError(`Failed to delete directory ${extractedDocDirectory}. Error: ${error}`);
}
}
}
/**
* Creates a temporary directory for use in the {@link DocumentMinifier}.
* Uses the system's temporary directory.
*
* @returns The path of the created temporary directory
* @throws {DocumentMinificationError} If the temporary directory could not be created
*/
static createTemporaryDirectory() {
const tempDir = path.join(os.tmpdir(), 'document_minification_' + (0, uuid_1.v4)());
if (fs.existsSync(tempDir)) {
throw new errors_1.DocumentMinificationError(`Temporary directory already exists at ${tempDir}. Please try again.`);
}
try {
fs.mkdirSync(tempDir);
}
catch (error) {
throw new errors_1.DocumentMinificationError(`Failed creating temporary directory at ${error}`);
}
return tempDir;
}
/**
* Extracts a zip file to a given directory
* @param zippedDocumentPath The path to the zip file
* @param extractionDir The path to the directory where the contents of the zip file will be extracted to
*/
extractZipToDirectory(zippedDocumentPath, extractionDir) {
if (!fs.existsSync(extractionDir)) {
fs.mkdirSync(extractionDir);
}
const zip = new adm_zip_1.default(zippedDocumentPath);
zip.extractAllTo(extractionDir, true);
}
/**
* Creates a zip file from a given directory.
* @param sourceDir The path to the directory that needs to be zipped
* @param outputPath The path to the output zip file
*/
createZipFromDirectory(sourceDir, outputPath) {
const zip = new adm_zip_1.default();
zip.addLocalFolder(sourceDir);
zip.writeZip(outputPath);
}
/**
* Iterates through the inputDirectory and if it contains a supported media file, will export that media
* to the mediaDirectory and replace the media in the inputDirectory with a placeholder. The
* relative path will be preserved when moving the file to the mediaDirectory (e.g. a file located at
* "/inputDirectory/foo/bar.png" will be exported to "/mediaDirectory/foo/bar.png")
*
* @param inputDirectory The path to the input directory
* @param mediaDirectory The path to the directory where the supported media from inputDirectory will be exported to
* @throws {DocumentMinificationError} If a problem occurred when exporting the original media from inputDirectory to mediaDirectory
*/
exportMediaToMediaDirAndReplace(inputDirectory, mediaDirectory) {
const files = fsHelper_1.FsHelper.readdirSyncRecursive(inputDirectory);
for (const file of files) {
const ext = path.extname(file).toLowerCase();
const isSupportedFile = DocumentMinifier.SUPPORTED_MEDIA_FORMATS.includes(ext);
if (isSupportedFile) {
const filePath = path.join(inputDirectory, file);
const mediaPath = path.join(mediaDirectory, file);
try {
const mediaPathParentDir = path.dirname(mediaPath);
if (!fs.existsSync(mediaPathParentDir)) {
fs.mkdirSync(mediaPathParentDir, { recursive: true });
}
fs.renameSync(filePath, mediaPath);
fs.writeFileSync(filePath, DocumentMinifier.MEDIA_PLACEHOLDER_TEXT);
}
catch (error) {
throw new errors_1.DocumentMinificationError('Error when exporting and replacing media files', error);
}
}
}
}
/**
* Iterates through `mediaDirectory` and moves all files into the `inputDirectory` while preserving
* the relative paths. (e.g. /mediaDirectory/foo/bar.png will be moved to the path /inputDirectory/foo/bar.png
* and replace any file if it exists at that path. Any subdirectories in `mediaDirectory` will also be
* created in `inputDirectory`.
*
* @param inputDirectory The path to the input directory
* @param mediaDirectory The path to the directory where the original media lives. This media will be reinserted back and replace any
* placeholder media.
* @throws {DocumentMinificationError} If a problem occurred when trying to reinsert the media
*/
replaceMediaInDir(inputDirectory, mediaDirectory) {
const filesAndDirs = fsHelper_1.FsHelper.readdirSyncRecursive(mediaDirectory);
const files = filesAndDirs.filter((file) => {
const ext = path.extname(file).toLowerCase();
const isSupportedFile = DocumentMinifier.SUPPORTED_MEDIA_FORMATS.includes(ext);
return isSupportedFile;
});
for (const file of files) {
const mediaPath = path.join(mediaDirectory, file);
const inputPath = path.join(inputDirectory, file);
const inputPathParentDir = path.dirname(inputPath);
if (!fs.existsSync(inputPathParentDir)) {
try {
fs.mkdirSync(inputPathParentDir, { recursive: true });
}
catch (error) {
throw new errors_1.DocumentMinificationError(`Error when reinserting media. Failed to create directory at ${inputPathParentDir}.`, error);
}
}
try {
if (fs.existsSync(inputPath)) {
fs.unlinkSync(inputPath);
}
fs.renameSync(mediaPath, inputPath);
}
catch (error) {
throw new errors_1.DocumentMinificationError(`Error when reinserting media. Failed to move media back to ${inputPath}`, error);
}
}
}
}
exports.DocumentMinifier = DocumentMinifier;
/** Which input document types are supported for minification. */
DocumentMinifier.SUPPORTED_DOCUMENT_TYPES = ['.pptx', '.docx'];
/** Which media formats in the documents are supported for minification. */
DocumentMinifier.SUPPORTED_MEDIA_FORMATS = [
// Image formats
'.png',
'.jpg',
'.jpeg',
'.emf',
'.bmp',
'.tiff',
'.wdp',
'.svg',
'.gif',
// Video formats
// Taken from https://support.microsoft.com/en-gb/office/video-and-audio-file-formats-supported-in-powerpoint-d8b12450-26db-4c7b-a5c1-593d3418fb59
'.mp4',
'.asf',
'.avi',
'.m4v',
'.mpg',
'.mpeg',
'.wmv',
'.mov',
// Audio formats, taken from the same URL as video
'.aiff',
'.au',
'.mid',
'.midi',
'.mp3',
'.m4a',
'.wav',
'.wma',
];
DocumentMinifier.EXTRACTED_DOC_DIR_NAME = 'extracted_doc';
DocumentMinifier.ORIGINAL_MEDIA_DIR_NAME = 'original_media';
DocumentMinifier.MINIFIED_DOC_FILE_BASE_NAME = 'minifiedDoc';
DocumentMinifier.MINIFIED_DOC_SIZE_LIMIT_WARNING = 5000000;
DocumentMinifier.MEDIA_PLACEHOLDER_TEXT = 'DeepL Media Placeholder';
;