UNPKG

@curvenote/cli

Version:
238 lines (237 loc) 9.97 kB
import fs from 'node:fs'; import Bottleneck from 'bottleneck'; import { encode } from 'html-entities'; import { KINDS, oxaLinkToId, oxaLink, OutputSummaryKind, ReferenceFormatTypes, } from '@curvenote/blocks'; import { DEFAULT_IMAGE_WIDTH, nodeNames, ReferenceKind } from '@curvenote/schema'; import { Block, Version } from '../../../models.js'; import { basekey } from './basekey.js'; import { getEditorState, getEditorStateFromHTML } from './getEditorState.js'; import { getImageSrc } from './getImageSrc.js'; import { getBlockAndLatestVersion } from './getBlockAndLatestVersion.js'; function getCodeHTML(content, language, linenumbers) { return `<pre language="${language}"${linenumbers ? ' linenumbers=""' : ''}><code>${encode(content)}</code></pre>`; } function getFigureHTML(id, src, title, caption, style) { const figcaption = caption ? `\n <figcaption kind="fig">${caption}</figcaption>` : ''; const { width = DEFAULT_IMAGE_WIDTH, align = 'center', numbered = false } = style; const widthPercent = `${`${width}`.replace(/%/g, '')}%`; return `<figure id="${id}"${numbered ? ' numbered=""' : ''} align="${align}"> <img src="${src}" align="${align}" alt="${title}" width="${widthPercent}">${figcaption} </figure>`; } async function getEditorStateFromFirstHTMLOutput(session, version) { // find first const htmlOutput = version.data.outputs.find((output) => output.kind === OutputSummaryKind.html && Boolean(output.content)); if (!htmlOutput) return null; let { content } = htmlOutput; if (htmlOutput.link) { const response = await session.fetch(htmlOutput.link); if (!response.ok) return null; content = await response.text(); } return content ? getEditorStateFromHTML(content) : null; } export function outputHasHtml(version) { return version.data.outputs.reduce((found, { kind, content }) => { return found || (kind === OutputSummaryKind.html && Boolean(content)); }, false); } export function outputHasImage(version) { return version.data.outputs.reduce((found, { kind }) => { return found || kind === OutputSummaryKind.image; }, false); } export async function walkArticle(session, data, templateTags = [], referenceFormat = ReferenceFormatTypes.bibtex) { session.log.debug('Starting walkArticle...'); const images = {}; const referenceKeys = new Set(); const references = {}; const limiter = new Bottleneck({ maxConcurrent: 25 }); const templateTagSet = new Set(templateTags); // ensure dedupe const children = await Promise.all(data.order.map(async (k) => { const articleChild = data.children[k]; const srcId = articleChild?.src; const style = articleChild?.style ?? {}; if (!srcId) return {}; const childBlock = await limiter.schedule(() => new Block(session, srcId).get()); const childVersion = await limiter.schedule(() => new Version(session, srcId).get()); // Do not walk the content if it shouldn't be walked if (new Set(childBlock.data.tags).has('no-export')) return {}; switch (childVersion.data.kind) { case KINDS.Content: { const state = getEditorState(childVersion.data.content); const matchingTags = childBlock.data.tags.filter((t) => templateTagSet.has(t)); return { state, block: childBlock, version: childVersion, templateTags: matchingTags.length > 0 ? matchingTags : undefined, }; } case KINDS.Code: { const version = childVersion; const html = getCodeHTML(version.data.content, version.data.language, false); const state = getEditorState(html); return { state, block: childBlock, version: childVersion, }; } case KINDS.Image: { const key = oxaLink('', childVersion.id); const version = childVersion; if (!key) return {}; const html = getFigureHTML(articleChild.src.block, key, childVersion.data.title, // Note: the caption is on the block! childBlock.data.caption ?? '', style); const state = getEditorState(html); images[key] = version; return { state, block: childBlock, version }; } case KINDS.Output: { const key = oxaLink('', childVersion.id); const version = childVersion; if (!key) return {}; if (outputHasImage(version)) { const html = getFigureHTML(articleChild.id, key, childVersion.data.title, // Note: the caption is on the block! childBlock.data.caption ?? '', style); const state = getEditorState(html); images[key] = version; return { state, block: childBlock, version }; } if (outputHasHtml(version)) { const state = await getEditorStateFromFirstHTMLOutput(session, version); if (state == null) return {}; return { state, block: childBlock, version }; } return { block: childBlock, version }; } default: return {}; } })); // Load all images and references Object.entries(children).forEach(([, { state }]) => { if (!state) return; state.doc.descendants((node) => { switch (node.type.name) { case nodeNames.image: { const { src } = node.attrs; const id = oxaLinkToId(src)?.block; if (id) images[src] = new Version(session, id); return true; } case nodeNames.cite: { const { key, kind } = node.attrs; switch (kind) { case ReferenceKind.cite: if (key) referenceKeys.add(key); return true; case ReferenceKind.table: case ReferenceKind.eq: case ReferenceKind.sec: case ReferenceKind.code: case ReferenceKind.fig: // TODO: add a lookup table for reference IDs return true; default: return true; } } default: return true; } }); }); // Load all of the references session.log.debug('Starting Reference Localizaton...'); await Promise.all([...referenceKeys].map(async (key) => { const id = oxaLinkToId(key)?.block; if (!id) return; // Always load the latest version for references! const { version } = await limiter.schedule(() => getBlockAndLatestVersion(session, id, { format: referenceFormat, })); if (!version) { session.log.error(`Could not fetch latest version of reference - skipping ${key}`); return; } if (version.data.kind !== KINDS.Reference) return; const { content } = version.data; // Extract the label: '@article{SimPEG2015,\n...' ➡️ 'SimPEG2015' const label = content.slice(content.indexOf('{') + 1, content.indexOf(',')); const existing = references[basekey(key)]; const state = referenceFormat === ReferenceFormatTypes.html ? getEditorState(content) : undefined; if (existing?.version) { const ve = existing.version.id.version; const v = version.id.version; // if existing, only update if incoming version is defined and greater than the existing if (ve == null || ve < (v ?? 0)) { references[basekey(key)] = { label, bibtex: content, version, state, }; } } else { references[basekey(key)] = { label, bibtex: content, version, state, }; } })); const contentChildren = children .filter((c) => !c.templateTags) .filter((c) => Object.keys(c).length > 0); const taggedChildren = children .filter((c) => c.templateTags) .filter((c) => Object.keys(c).length > 0); const tagged = Array.from(templateTagSet).reduce((obj, tag) => { return { ...obj, [tag]: taggedChildren.filter((c) => c.templateTags?.indexOf(tag) !== -1) }; }, {}); return { children: contentChildren, images, references, tagged, }; } export async function loadImagesToBuffers(session, images) { const buffers = {}; await Promise.all(Object.entries(images).map(async ([key, version]) => { await version.get(); const { src } = getImageSrc(version); if (!src) return; const response = await session.fetch(src); // TODO convert SVGs to PNG` with imagemagick const buffer = await response.buffer(); buffers[key] = buffer; })); return buffers; } export function loadLocalImagesToBuffers(images) { const buffers = {}; images.forEach((image) => { buffers[image.url] = fs.readFileSync(image.url); }); return buffers; }