jats-xml
Version:
Types and utilities for working with JATS in Typescript
252 lines (251 loc) • 10.7 kB
JavaScript
import { toText } from 'myst-common';
import { xml2js } from 'xml-js';
import { doi } from 'doi-utils';
import { validatePageFrontmatter } from 'myst-frontmatter';
import { select as unistSelect, selectAll } from 'unist-util-select';
import { Tags } from 'jats-tags';
import { findArticleId, processAffiliation, processContributor } from './utils.js';
import { tic } from 'myst-cli-utils';
import { articleMetaOrder, tableWrapOrder } from './order.js';
import { serializeJatsXml, convertToUnist, convertToXml, toDate, } from 'jats-utils';
function select(selector, node) {
var _a;
return ((_a = unistSelect(selector, node)) !== null && _a !== void 0 ? _a : undefined);
}
const DEFAULT_DOCTYPE = 'article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD with MathML3 v1.3 20210610//EN" "http://jats.nlm.nih.gov/publishing/1.3/JATS-archivearticle1-3-mathml3.dtd"';
export class Jats {
constructor(data, opts) {
var _a, _b;
const toc = tic();
this.log = opts === null || opts === void 0 ? void 0 : opts.log;
if (opts === null || opts === void 0 ? void 0 : opts.source)
this.source = opts.source;
try {
this.raw = xml2js(data, { compact: false });
}
catch (error) {
throw new Error('Problem parsing the JATS document, please ensure it is XML');
}
const { declaration, elements } = this.raw;
this.declaration = declaration === null || declaration === void 0 ? void 0 : declaration.attributes;
if ((elements === null || elements === void 0 ? void 0 : elements.length) && elements[0].type !== 'doctype') {
(_a = this.log) === null || _a === void 0 ? void 0 : _a.warn('JATS is missing DOCTYPE declaration');
elements.unshift({ type: 'doctype' });
}
if (!((elements === null || elements === void 0 ? void 0 : elements.length) === 2 && elements[0].type === 'doctype' && hasSingleArticle(elements[1]))) {
throw new Error('JATS must be structured as <!DOCTYPE><article>...</article>');
}
this.doctype = elements[0].doctype;
const converted = convertToUnist(elements[1]);
this.tree = select('article', converted);
(_b = this.log) === null || _b === void 0 ? void 0 : _b.debug(toc('Parsed and converted JATS to unist tree in %s'));
}
get frontmatter() {
var _a, _b, _c, _d, _e, _f, _g, _h;
const title = this.articleTitle;
const subtitle = this.articleSubtitle;
const short_title = this.articleAltTitle;
let date;
if (this.publicationDate) {
const pubDate = toDate(this.publicationDate);
if (pubDate) {
const year = pubDate.getFullYear();
const month = (pubDate.getMonth() + 1).toString().padStart(2, '0');
const day = pubDate.getDate().toString().padStart(2, '0');
date = `${year}-${month}-${day}`;
}
}
const authors = (_a = this.articleAuthors) === null || _a === void 0 ? void 0 : _a.map((auth) => {
return processContributor(auth);
});
const affiliations = (_b = this.articleAffiliations) === null || _b === void 0 ? void 0 : _b.map((aff) => {
return processAffiliation(aff);
});
const keywords = (_d = (_c = this.keywords) === null || _c === void 0 ? void 0 : _c.map((k) => toText(k))) !== null && _d !== void 0 ? _d : [];
const firstSubject = select(Tags.subject, (_e = this.articleCategories) !== null && _e !== void 0 ? _e : this.front);
const journalTitle = select(Tags.journalTitle, this.front);
const license = this.license;
let licenseString = null;
if (license === null || license === void 0 ? void 0 : license['xlink:href']) {
licenseString = license['xlink:href'];
}
else if (select('[type=ali:license_ref]', license)) {
licenseString = toText(select('[type=ali:license_ref]', license));
}
else if (selectAll('ext-link', license).length === 1) {
// this should only happen if there is only one ext-link
licenseString = (_f = select('ext-link', license)['xlink:href']) !== null && _f !== void 0 ? _f : null;
}
else if (license) {
licenseString = toText(license);
}
let openAccess;
const licenseType = (_g = license === null || license === void 0 ? void 0 : license['license-type']) === null || _g === void 0 ? void 0 : _g.toLowerCase();
if (licenseType && ['openaccess', 'open-access'].includes(licenseType)) {
openAccess = true;
}
else if (licenseString === null || licenseString === void 0 ? void 0 : licenseString.match(/^\s*Open Access\s*This/)) {
licenseString = licenseString.replace(/^\s*Open Access\s*/, '');
openAccess = true;
}
else if (licenseString === null || licenseString === void 0 ? void 0 : licenseString.toLowerCase().startsWith('this is an open access article')) {
openAccess = true;
}
const pmc = this.pmc;
const identifiers = pmc ? { pmcid: `PMC${pmc}` } : undefined;
const frontmatter = validatePageFrontmatter({
title: title ? toText(title) : undefined,
subtitle: subtitle ? toText(subtitle) : undefined,
short_title: short_title ? toText(short_title) : undefined,
doi: (_h = this.doi) !== null && _h !== void 0 ? _h : undefined,
identifiers,
date,
authors: authors.length ? authors : undefined,
// editors,
affiliations: affiliations.length ? affiliations : undefined,
keywords: keywords.length ? keywords : undefined,
venue: journalTitle ? { title: toText(journalTitle) } : undefined,
subject: firstSubject ? toText(firstSubject) : undefined,
license: licenseString !== null && licenseString !== void 0 ? licenseString : undefined,
open_access: openAccess,
}, { property: 'frontmatter', messages: {} });
return frontmatter;
}
get front() {
return select(Tags.front, this.tree);
}
get articleMeta() {
return select(Tags.articleMeta, this.tree);
}
get permissions() {
return select(Tags.permissions, this.front);
}
get doi() {
var _a;
return doi.normalize((_a = findArticleId(this.front, 'doi')) !== null && _a !== void 0 ? _a : '');
}
get pmc() {
var _a;
return (_a = findArticleId(this.front, 'pmc')) === null || _a === void 0 ? void 0 : _a.replace(/^PMC:?/, '');
}
get pmid() {
return findArticleId(this.front, 'pmid');
}
get publicationDates() {
return selectAll(Tags.pubDate, this.front);
}
get publicationDate() {
return this.publicationDates.find((d) => !!select(Tags.day, d));
}
get license() {
return select(Tags.license, this.permissions);
}
get keywordGroup() {
return select(Tags.kwdGroup, this.front);
}
/** The first keywords */
get keywords() {
return selectAll(Tags.kwd, this.keywordGroup);
}
get keywordGroups() {
return selectAll(Tags.kwdGroup, this.front);
}
get articleCategories() {
return select(Tags.articleCategories, this.front);
}
get titleGroup() {
return select(Tags.titleGroup, this.front);
}
get articleTitle() {
return select(Tags.articleTitle, this.titleGroup);
}
get articleSubtitle() {
return select(Tags.subtitle, this.titleGroup);
}
get articleAltTitle() {
return select(Tags.altTitle, this.titleGroup);
}
get abstract() {
return select(Tags.abstract, this.front);
}
get abstracts() {
return selectAll(Tags.abstract, this.front);
}
get contribGroup() {
return select(Tags.contribGroup, this.front);
}
get contribGroups() {
return selectAll(Tags.contribGroup, this.front);
}
get articleAuthors() {
const contribs = selectAll(Tags.contrib, {
type: 'contribGroups',
children: this.contribGroups,
});
const authors = contribs.filter((contrib) => {
const contribType = contrib['contrib-type'];
return !contribType || contribType === 'author';
});
return authors;
}
get articleAffiliations() {
return selectAll(`${Tags.aff}[id]`, this.front);
}
get body() {
return select(Tags.body, this.tree);
}
get back() {
return select(Tags.back, this.tree);
}
get subArticles() {
return selectAll(Tags.subArticle, this.tree);
}
get refList() {
return select(Tags.refList, this.back);
}
get references() {
return selectAll(Tags.ref, this.refList);
}
sort() {
var _a;
if (this.articleMeta) {
this.articleMeta.children = (_a = this.articleMeta) === null || _a === void 0 ? void 0 : _a.children.sort((a, b) => articleMetaOrder.findIndex((x) => x === a.type) -
articleMetaOrder.findIndex((x) => x === b.type));
}
selectAll('table-wrap', this.tree).forEach((tw) => {
tw.children = tw.children.sort((a, b) => { var _a, _b; return ((_a = tableWrapOrder[a.type]) !== null && _a !== void 0 ? _a : -1) - ((_b = tableWrapOrder[b.type]) !== null && _b !== void 0 ? _b : -1); });
});
}
serialize(opts) {
var _a;
this.sort();
const body = convertToXml(this.tree);
const element = (opts === null || opts === void 0 ? void 0 : opts.bodyOnly)
? body
: {
type: 'element',
elements: [
{
type: 'doctype',
doctype: this.doctype || DEFAULT_DOCTYPE,
},
body,
],
declaration: { attributes: (_a = this.declaration) !== null && _a !== void 0 ? _a : { version: '1.0', encoding: 'UTF-8' } },
};
const xml = serializeJatsXml(element, opts);
return xml;
}
}
function hasSingleArticle(element) {
var _a;
if (element.name === 'article') {
return true;
}
if (element.name === 'pmc-articleset' &&
((_a = element.elements) === null || _a === void 0 ? void 0 : _a.length) === 1 &&
element.elements[0].name === 'article') {
return true;
}
return false;
}