UNPKG

@bader-nasser/pdftools

Version:

PDF tools to manipulate and process PDF files

198 lines (197 loc) 9.16 kB
import path from 'node:path'; import process from 'node:process'; import fs from 'fs-extra'; import { Args, Flags } from '@oclif/core'; import JSON5 from 'json5'; import YAML from 'yaml'; import TOML from '@ltd/j-toml'; // Removing the extension will make the built cli crash import { addExtension, parseDataFile, removeExtension, updateMetadata, } from '../../utils.js'; import { BaseCommandWithCompression } from '../../base-command-with-compression.js'; function parsePageRanges(pageRanges) { const splittedRanges = pageRanges.split(/,+/); return splittedRanges.map((range) => range.trim().replaceAll(/[\s-]+/g, '-')); } export default class Process extends BaseCommandWithCompression { static aliases = ['p']; static description = 'Merge PDFs, extract pages and update metadata using simple file'; static examples = ['<%= config.bin %> <%= command.id %> data.json']; static args = { file: Args.string({ description: `Data file to process (JSON5 or YAML or TOML) See: https://github.com/bader-nasser/pdftools/blob/main/test/docs/data.json (or .yaml or .toml) Use / in the paths. On Windows, \\ can be changed to either / or \\\\`, required: true, }), }; static flags = { keep: Flags.boolean({ char: 'k', description: `Keep output's name`, }), }; async run() { const { args, flags } = await this.parse(Process); const { compress, 'dry-run': dryRun, silent, keep } = flags; const { file } = args; let isCompressing = compress; let isDryRunning = dryRun; let isSilencing = silent; try { const filePath = path.join(process.cwd(), file); const fileDirname = path.dirname(filePath); // credit: // https://github.com/vercel/next.js/blob/962ce0dcee7993cedeb949c7f31ef34afc829578/packages/next/src/lib/find-config.ts#L53-L56 const fileContents = await fs.readFile(filePath, 'utf8'); let fileObject; if (filePath.endsWith('.json') || filePath.endsWith('.json5')) { fileObject = JSON5.parse(fileContents); } else if (filePath.endsWith('.yaml') || filePath.endsWith('.yml')) { fileObject = YAML.parse(fileContents); } else if (filePath.endsWith('.toml')) { fileObject = TOML.parse(fileContents); } else { this.log('Supported formats are: JSON, YAML and TOML.'); this.log('Allowed extensions are: .json, .json5, .yaml, .yml and .toml.'); this.exit(1); } const { output, files, compress: compressJson = false, dryRun: dryRunJson = false, silent: silentJson = false, ...meta } = fileObject; isCompressing = compress || compressJson; isDryRunning = dryRun || dryRunJson; isSilencing = silent || silentJson; let relativeOutput = path.relative(process.cwd(), path.resolve(fileDirname, output)); const processedData = { data: [], useShare: false }; let outerIndex = 0; let innerIndex = 0; for (const file of files) { const fileHandle = `${String.fromCodePoint(65 + outerIndex)}${String.fromCodePoint(65 + innerIndex)}`; innerIndex += 1; if (innerIndex > 25) { innerIndex = 0; outerIndex += 1; } let fileName = file; const pageRanges = { all: [], shared: [] }; if (typeof file === 'object') { // @ts-expect-error Silence TS error! const { name, pages, data } = file; if (pages && data) { console.log(file); this.log('File object can NOT contain pages & data at the same time!'); this.exit(1); } if (pages ?? data) { fileName = name; if (pages) { if (Array.isArray(pages)) { for (const page of pages) { const parsedPage = parsePageRanges(`${page}`); pageRanges.all.push(...parsedPage); } } else { // number or string const parsedPage = parsePageRanges(`${pages}`); pageRanges.all.push(...parsedPage); } } else { const dataPath = path.resolve(fileDirname, `${data}`); const parsedData = await parseDataFile(dataPath); if (parsedData.error) { throw new Error(`Error while parsing data file: ${dataPath}`); } if (parsedData.all) { pageRanges.all.push(...parsedData.all); } if (parsedData.shared) { processedData.useShare = true; pageRanges.shared.push(...parsedData.shared); } } } else { console.log(file); throw new Error('File object should have either pages or data attriute!'); } } fileName = path.relative(process.cwd(), path.resolve(fileDirname, fileName)); const fileData = { fileHandle, fileName, pageRanges }; // console.log(fileData) processedData.data.push(fileData); } const { useShare, data } = processedData; const handles = data.map(({ fileHandle, fileName }) => `${fileHandle}=${fileName}`); const allRanges = data .map(({ fileHandle, pageRanges }) => `${fileHandle}${pageRanges.all.join(' ' + fileHandle)}`) .join(' ') .split(' '); relativeOutput = removeExtension(relativeOutput); let relativeShareOutput = `${relativeOutput}-share`; if (isCompressing && !keep) { relativeOutput = `${relativeOutput}-compressed`; relativeShareOutput = `${relativeShareOutput}-compressed`; } relativeOutput = addExtension(relativeOutput); relativeShareOutput = addExtension(relativeShareOutput); await this.ensureDirExists(relativeOutput); if (useShare) { this.logger(`Output files: ${relativeOutput}, ${relativeShareOutput}`, isSilencing); } else { this.logger(`Output files: ${relativeOutput}`, isSilencing); } this.logger(`Creating ${relativeOutput} using file: ${file}...`, isSilencing); const args = [...handles, 'cat', ...allRanges, 'output', relativeOutput]; if (compress) { args.push('compress'); } await this.execute('pdftk', args, isDryRunning); await updateMetadata({ filePath: relativeOutput, meta, dryRun: isDryRunning, }); if (useShare) { const outputShareStrings = []; for (const { fileHandle, pageRanges } of data) { let catString = `${fileHandle}`; const ranges = []; if (pageRanges.shared && pageRanges.shared.length > 0) { ranges.push(...pageRanges.shared); } else { ranges.push(...pageRanges.all); } catString = `${catString}${ranges.join(' ' + fileHandle)}`; outputShareStrings.push(catString); } const allShareRanges = outputShareStrings.join(' ').split(' '); this.logger(`Creating ${relativeShareOutput} using file: ${file}...`, isSilencing); const args = [ ...handles, 'cat', ...allShareRanges, 'output', relativeShareOutput, ]; if (compress) { args.push('compress'); } await this.execute('pdftk', args, isDryRunning); await updateMetadata({ filePath: relativeShareOutput, meta, dryRun: isDryRunning, }); } } catch (error) { console.error(error); } this.logger('Done.', isSilencing); } }