UNPKG

jats-xml

Version:

Types and utilities for working with JATS in Typescript

252 lines (251 loc) 10.7 kB
import { toText } from 'myst-common'; import { xml2js } from 'xml-js'; import { doi } from 'doi-utils'; import { validatePageFrontmatter } from 'myst-frontmatter'; import { select as unistSelect, selectAll } from 'unist-util-select'; import { Tags } from 'jats-tags'; import { findArticleId, processAffiliation, processContributor } from './utils.js'; import { tic } from 'myst-cli-utils'; import { articleMetaOrder, tableWrapOrder } from './order.js'; import { serializeJatsXml, convertToUnist, convertToXml, toDate, } from 'jats-utils'; function select(selector, node) { var _a; return ((_a = unistSelect(selector, node)) !== null && _a !== void 0 ? _a : undefined); } const DEFAULT_DOCTYPE = 'article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD with MathML3 v1.3 20210610//EN" "http://jats.nlm.nih.gov/publishing/1.3/JATS-archivearticle1-3-mathml3.dtd"'; export class Jats { constructor(data, opts) { var _a, _b; const toc = tic(); this.log = opts === null || opts === void 0 ? void 0 : opts.log; if (opts === null || opts === void 0 ? void 0 : opts.source) this.source = opts.source; try { this.raw = xml2js(data, { compact: false }); } catch (error) { throw new Error('Problem parsing the JATS document, please ensure it is XML'); } const { declaration, elements } = this.raw; this.declaration = declaration === null || declaration === void 0 ? void 0 : declaration.attributes; if ((elements === null || elements === void 0 ? void 0 : elements.length) && elements[0].type !== 'doctype') { (_a = this.log) === null || _a === void 0 ? void 0 : _a.warn('JATS is missing DOCTYPE declaration'); elements.unshift({ type: 'doctype' }); } if (!((elements === null || elements === void 0 ? void 0 : elements.length) === 2 && elements[0].type === 'doctype' && hasSingleArticle(elements[1]))) { throw new Error('JATS must be structured as <!DOCTYPE><article>...</article>'); } this.doctype = elements[0].doctype; const converted = convertToUnist(elements[1]); this.tree = select('article', converted); (_b = this.log) === null || _b === void 0 ? void 0 : _b.debug(toc('Parsed and converted JATS to unist tree in %s')); } get frontmatter() { var _a, _b, _c, _d, _e, _f, _g, _h; const title = this.articleTitle; const subtitle = this.articleSubtitle; const short_title = this.articleAltTitle; let date; if (this.publicationDate) { const pubDate = toDate(this.publicationDate); if (pubDate) { const year = pubDate.getFullYear(); const month = (pubDate.getMonth() + 1).toString().padStart(2, '0'); const day = pubDate.getDate().toString().padStart(2, '0'); date = `${year}-${month}-${day}`; } } const authors = (_a = this.articleAuthors) === null || _a === void 0 ? void 0 : _a.map((auth) => { return processContributor(auth); }); const affiliations = (_b = this.articleAffiliations) === null || _b === void 0 ? void 0 : _b.map((aff) => { return processAffiliation(aff); }); const keywords = (_d = (_c = this.keywords) === null || _c === void 0 ? void 0 : _c.map((k) => toText(k))) !== null && _d !== void 0 ? _d : []; const firstSubject = select(Tags.subject, (_e = this.articleCategories) !== null && _e !== void 0 ? _e : this.front); const journalTitle = select(Tags.journalTitle, this.front); const license = this.license; let licenseString = null; if (license === null || license === void 0 ? void 0 : license['xlink:href']) { licenseString = license['xlink:href']; } else if (select('[type=ali:license_ref]', license)) { licenseString = toText(select('[type=ali:license_ref]', license)); } else if (selectAll('ext-link', license).length === 1) { // this should only happen if there is only one ext-link licenseString = (_f = select('ext-link', license)['xlink:href']) !== null && _f !== void 0 ? _f : null; } else if (license) { licenseString = toText(license); } let openAccess; const licenseType = (_g = license === null || license === void 0 ? void 0 : license['license-type']) === null || _g === void 0 ? void 0 : _g.toLowerCase(); if (licenseType && ['openaccess', 'open-access'].includes(licenseType)) { openAccess = true; } else if (licenseString === null || licenseString === void 0 ? void 0 : licenseString.match(/^\s*Open Access\s*This/)) { licenseString = licenseString.replace(/^\s*Open Access\s*/, ''); openAccess = true; } else if (licenseString === null || licenseString === void 0 ? void 0 : licenseString.toLowerCase().startsWith('this is an open access article')) { openAccess = true; } const pmc = this.pmc; const identifiers = pmc ? { pmcid: `PMC${pmc}` } : undefined; const frontmatter = validatePageFrontmatter({ title: title ? toText(title) : undefined, subtitle: subtitle ? toText(subtitle) : undefined, short_title: short_title ? toText(short_title) : undefined, doi: (_h = this.doi) !== null && _h !== void 0 ? _h : undefined, identifiers, date, authors: authors.length ? authors : undefined, // editors, affiliations: affiliations.length ? affiliations : undefined, keywords: keywords.length ? keywords : undefined, venue: journalTitle ? { title: toText(journalTitle) } : undefined, subject: firstSubject ? toText(firstSubject) : undefined, license: licenseString !== null && licenseString !== void 0 ? licenseString : undefined, open_access: openAccess, }, { property: 'frontmatter', messages: {} }); return frontmatter; } get front() { return select(Tags.front, this.tree); } get articleMeta() { return select(Tags.articleMeta, this.tree); } get permissions() { return select(Tags.permissions, this.front); } get doi() { var _a; return doi.normalize((_a = findArticleId(this.front, 'doi')) !== null && _a !== void 0 ? _a : ''); } get pmc() { var _a; return (_a = findArticleId(this.front, 'pmc')) === null || _a === void 0 ? void 0 : _a.replace(/^PMC:?/, ''); } get pmid() { return findArticleId(this.front, 'pmid'); } get publicationDates() { return selectAll(Tags.pubDate, this.front); } get publicationDate() { return this.publicationDates.find((d) => !!select(Tags.day, d)); } get license() { return select(Tags.license, this.permissions); } get keywordGroup() { return select(Tags.kwdGroup, this.front); } /** The first keywords */ get keywords() { return selectAll(Tags.kwd, this.keywordGroup); } get keywordGroups() { return selectAll(Tags.kwdGroup, this.front); } get articleCategories() { return select(Tags.articleCategories, this.front); } get titleGroup() { return select(Tags.titleGroup, this.front); } get articleTitle() { return select(Tags.articleTitle, this.titleGroup); } get articleSubtitle() { return select(Tags.subtitle, this.titleGroup); } get articleAltTitle() { return select(Tags.altTitle, this.titleGroup); } get abstract() { return select(Tags.abstract, this.front); } get abstracts() { return selectAll(Tags.abstract, this.front); } get contribGroup() { return select(Tags.contribGroup, this.front); } get contribGroups() { return selectAll(Tags.contribGroup, this.front); } get articleAuthors() { const contribs = selectAll(Tags.contrib, { type: 'contribGroups', children: this.contribGroups, }); const authors = contribs.filter((contrib) => { const contribType = contrib['contrib-type']; return !contribType || contribType === 'author'; }); return authors; } get articleAffiliations() { return selectAll(`${Tags.aff}[id]`, this.front); } get body() { return select(Tags.body, this.tree); } get back() { return select(Tags.back, this.tree); } get subArticles() { return selectAll(Tags.subArticle, this.tree); } get refList() { return select(Tags.refList, this.back); } get references() { return selectAll(Tags.ref, this.refList); } sort() { var _a; if (this.articleMeta) { this.articleMeta.children = (_a = this.articleMeta) === null || _a === void 0 ? void 0 : _a.children.sort((a, b) => articleMetaOrder.findIndex((x) => x === a.type) - articleMetaOrder.findIndex((x) => x === b.type)); } selectAll('table-wrap', this.tree).forEach((tw) => { tw.children = tw.children.sort((a, b) => { var _a, _b; return ((_a = tableWrapOrder[a.type]) !== null && _a !== void 0 ? _a : -1) - ((_b = tableWrapOrder[b.type]) !== null && _b !== void 0 ? _b : -1); }); }); } serialize(opts) { var _a; this.sort(); const body = convertToXml(this.tree); const element = (opts === null || opts === void 0 ? void 0 : opts.bodyOnly) ? body : { type: 'element', elements: [ { type: 'doctype', doctype: this.doctype || DEFAULT_DOCTYPE, }, body, ], declaration: { attributes: (_a = this.declaration) !== null && _a !== void 0 ? _a : { version: '1.0', encoding: 'UTF-8' } }, }; const xml = serializeJatsXml(element, opts); return xml; } } function hasSingleArticle(element) { var _a; if (element.name === 'article') { return true; } if (element.name === 'pmc-articleset' && ((_a = element.elements) === null || _a === void 0 ? void 0 : _a.length) === 1 && element.elements[0].name === 'article') { return true; } return false; }