UNPKG

jats-xml

Version:

Types and utilities for working with JATS in Typescript

278 lines (277 loc) 9.81 kB
import fs from 'fs'; import path from 'path'; import fetch from 'node-fetch'; import AdmZip from 'adm-zip'; import which from 'which'; import { makeExecutable, writeFileToFolder } from 'myst-cli-utils'; import chalk from 'chalk'; const JATS_VERSIONS = [ '1.1', '1.1d1', '1.1d2', '1.1d3', '1.2', '1.2d1', '1.2d2', '1.3', '1.3d1', '1.3d2', ]; const DEFAULT_JATS_VERSION = '1.3'; const MATHML_VERSIONS = ['2', '3']; const DEFAULT_MATHML_VERSION = '3'; const JATS_LIBRARIES = ['authoring', 'publishing', 'archiving']; const DEFAULT_JATS_LIBRARY = 'archiving'; /** * Return static/ directory adjacent to the code * * This provides a standard location to cache DTD files, minimizing re-downloading. */ function defaultDirectory() { return path.join(__dirname, 'static'); } function warnOnOptionsMismatch(session, opts, inferredOpts) { if (opts.jats && inferredOpts.jats && opts.jats !== inferredOpts.jats) { session.log.warn(`Using JATS version ${opts.jats}; does not match version inferred from file ${inferredOpts.jats}`); } if (opts.library && inferredOpts.library && opts.library !== inferredOpts.library) { session.log.warn(`Using JATS library ${opts.library}; does not match library inferred from file ${inferredOpts.library}`); } if (opts.mathml && inferredOpts.mathml && opts.mathml !== inferredOpts.mathml) { session.log.warn(`Using MathML version ${opts.mathml}; does not match version inferred from file ${inferredOpts.mathml}`); } if (opts.oasis && !inferredOpts.oasis) { session.log.warn('Using OASIS table model; does not match non-OASIS inferred from file'); } } /** * Validate input value as JATS options and fill in defaults */ function validateOptions(session, opts, inferredOpts) { var _a, _b, _c, _d, _e; warnOnOptionsMismatch(session, opts, inferredOpts); let jats; if (!opts.jats) { jats = (_a = inferredOpts.jats) !== null && _a !== void 0 ? _a : DEFAULT_JATS_VERSION; } else if (!JATS_VERSIONS.includes(opts.jats)) { throw new Error(`Invalid JATS version "${opts.jats}" - must be one of [${JATS_VERSIONS.join(', ')}]`); } else { jats = opts.jats; } let mathml; if (!opts.mathml) { mathml = (_b = inferredOpts.mathml) !== null && _b !== void 0 ? _b : DEFAULT_MATHML_VERSION; } else if (!MATHML_VERSIONS.includes(opts.mathml)) { throw new Error(`Invalid MathML version "${opts.mathml}" - must be one of [${MATHML_VERSIONS.join(', ')}]`); } else { mathml = opts.mathml; } let library; if (!opts.library) { library = (_c = inferredOpts.library) !== null && _c !== void 0 ? _c : DEFAULT_JATS_LIBRARY; } else if (typeof opts.library !== 'string' || !JATS_LIBRARIES.includes(opts.library.toLowerCase())) { throw new Error(`Invalid JATS library "${opts.library}" - must be one of [${JATS_LIBRARIES.join(', ')}]`); } else { library = opts.library.toLowerCase(); } const oasis = (_d = inferredOpts.oasis) !== null && _d !== void 0 ? _d : !!opts.oasis; if (library === 'authoring' && oasis) { throw new Error('JATS article authoring library cannot use OASIS table model'); } const out = { library, jats, mathml, oasis, directory: (_e = opts.directory) !== null && _e !== void 0 ? _e : defaultDirectory(), }; return out; } /** * DTD folder name */ function dtdFolder(opts) { const version = opts.jats.replace('.', '-'); const oasis = opts.oasis ? '-OASIS' : ''; const mathml = `MathML${opts.mathml}`; const library = opts.library.charAt(0).toUpperCase() + opts.library.slice(1); return `JATS-${library}-${version}${oasis}-${mathml}-DTD`; } /** * DTD zip file name on FTP server */ function dtdZipFile(opts) { return `${dtdFolder(opts)}.zip`; } /** * Local location of DTD zip file */ function localDtdZipFile(opts) { return path.join(opts.directory, dtdZipFile(opts)); } /** * Extracted DTD file name */ function dtdFile(opts) { const version = opts.jats.startsWith('1.3') ? opts.jats.replace('.', '-') : '1'; let article; if (opts.library === 'archiving') { article = opts.oasis ? 'archive-oasis-article' : 'archivearticle'; } else if (opts.library === 'publishing') { article = opts.oasis ? 'journalpublishing-oasis-article' : 'journalpublishing'; } else { article = 'articleauthoring'; } const mathml = opts.mathml === '3' ? '-mathml3' : ''; return `JATS-${article}${version}${mathml}.dtd`; } /** * Local location of extracted DTD file */ function localDtdFile(opts) { return path.join(opts.directory, dtdFolder(opts), dtdFile(opts)); } /** * NIH FTP server and path for downloading JATS DTD files * * This is accessed by node-fetch over https. */ function ftpUrl(opts) { const library = opts.library === 'authoring' ? 'articleauthoring' : opts.library; return `https://ftp.ncbi.nih.gov/pub/jats/${library}/${opts.jats}/${dtdZipFile(opts)}`; } /** * Create a DTS-filename-options lookup for implicitly setting options based on JATS header content */ function buildDtdFileLookup() { const lookup = {}; JATS_VERSIONS.filter((jats) => jats === '1.2' || jats.startsWith('1.3')).forEach((jats) => { MATHML_VERSIONS.forEach((mathml) => { JATS_LIBRARIES.forEach((library) => { (library === 'authoring' ? [false] : [true, false]).forEach((oasis) => { const opts = { jats, mathml, library, oasis }; lookup[dtdFile(opts)] = opts; }); }); }); }); return lookup; } /** * Infer DTD options from file content * * This looks at DTD file name in DOCTYPE as well as dtd-version in article element */ export function inferOptions(file) { var _a, _b; const data = fs.readFileSync(file).toString(); const doctype = (_a = data.match(/<!DOCTYPE [\s\S]+?">/g)) === null || _a === void 0 ? void 0 : _a[0]; const lookup = buildDtdFileLookup(); let opts = {}; Object.entries(lookup).forEach(([key, value]) => { if (doctype === null || doctype === void 0 ? void 0 : doctype.includes(key)) opts = { ...value }; }); const article = (_b = data.match(/<article [\s\S]+?>/g)) === null || _b === void 0 ? void 0 : _b[0]; JATS_VERSIONS.forEach((jats) => { if (article === null || article === void 0 ? void 0 : article.includes(`dtd-version="${jats}"`)) opts.jats = jats; }); return opts; } /** * Download DTD zip file from NIH FTP server */ async function dtdDownload(session, opts) { if (!fs.existsSync(opts.directory)) { fs.mkdirSync(opts.directory, { recursive: true }); } session.log.info(`🌎 Downloading: ${ftpUrl(opts)}`); session.log.debug(`Saving to ${localDtdZipFile(opts)}`); const resp = await fetch(ftpUrl(opts)); const arrayBuffer = await resp.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); writeFileToFolder(localDtdZipFile(opts), buffer); } /** * Download DTD zip file from NIH FTP server if it does not yet exist */ async function ensureDtdZipExists(session, opts) { if (!fs.existsSync(path.join(opts.directory, dtdZipFile(opts)))) { await dtdDownload(session, opts); } } /** * Download and extract DTD file if it does not yet exist */ async function ensureDtdExists(session, opts) { if (!fs.existsSync(localDtdFile(opts))) { await ensureDtdZipExists(session, opts); const zipFile = localDtdZipFile(opts); session.log.info(`🤐 Unzipping template: ${zipFile}`); const zip = new AdmZip(zipFile); zip.extractAllTo(opts.directory); } } /** * Test if xmllint is available as a cli command */ export function isXmllintAvailable() { return which.sync('xmllint', { nothrow: true }); } /** * Run xmllint validation */ export async function xmllintValidate(session, file, dtd) { if (!isXmllintAvailable()) { session.log.error(`JATS validation against DTD requires xmllint\n\n${chalk.dim('To install:\n mac: brew install xmlstarlet\n debian: apt install libxml2-utils')}`); return; } try { // First drop DOCTYPE with DTD in it - we have already fetched the DTD const dropDtdCommand = `xmllint --dropdtd`; const validateCommand = `xmllint --noout --dtdvalid ${dtd}`; await makeExecutable(`${dropDtdCommand} ${file} | ${validateCommand} -`, session.log)(); } catch { return false; } return true; } /** * Check if JATS file is valid based on JATS version/library/etc. * * Returns true if valid and false if invalid. */ export async function validateJatsAgainstDtd(session, file, opts) { const inferredOpts = inferOptions(file); const validatedOpts = validateOptions(session, opts !== null && opts !== void 0 ? opts : {}, inferredOpts); await ensureDtdExists(session, validatedOpts); session.log.debug(`Validating against: ${localDtdFile(validatedOpts)}`); session.log.info(`🧐 Validating against: ${dtdFolder(validatedOpts)}`); const valid = await xmllintValidate(session, file, localDtdFile(validatedOpts)); return valid; } /** * Check if JATS file is valid based on JATS version/library/etc. * * Logs confirmation message if valid and throws an error if invalid. */ export async function validateJatsAgainstDtdWrapper(session, file, opts) { const success = await validateJatsAgainstDtd(session, file, opts); if (success) { session.log.info(chalk.greenBright('JATS validation passed!')); } else { throw new Error('JATS validation failed.'); } }