@curvenote/cli
Version:
CLI Client library for Curvenote
238 lines (237 loc) • 9.97 kB
JavaScript
import fs from 'node:fs';
import Bottleneck from 'bottleneck';
import { encode } from 'html-entities';
import { KINDS, oxaLinkToId, oxaLink, OutputSummaryKind, ReferenceFormatTypes, } from '@curvenote/blocks';
import { DEFAULT_IMAGE_WIDTH, nodeNames, ReferenceKind } from '@curvenote/schema';
import { Block, Version } from '../../../models.js';
import { basekey } from './basekey.js';
import { getEditorState, getEditorStateFromHTML } from './getEditorState.js';
import { getImageSrc } from './getImageSrc.js';
import { getBlockAndLatestVersion } from './getBlockAndLatestVersion.js';
function getCodeHTML(content, language, linenumbers) {
return `<pre language="${language}"${linenumbers ? ' linenumbers=""' : ''}><code>${encode(content)}</code></pre>`;
}
function getFigureHTML(id, src, title, caption, style) {
const figcaption = caption ? `\n <figcaption kind="fig">${caption}</figcaption>` : '';
const { width = DEFAULT_IMAGE_WIDTH, align = 'center', numbered = false } = style;
const widthPercent = `${`${width}`.replace(/%/g, '')}%`;
return `<figure id="${id}"${numbered ? ' numbered=""' : ''} align="${align}">
<img src="${src}" align="${align}" alt="${title}" width="${widthPercent}">${figcaption}
</figure>`;
}
async function getEditorStateFromFirstHTMLOutput(session, version) {
// find first
const htmlOutput = version.data.outputs.find((output) => output.kind === OutputSummaryKind.html && Boolean(output.content));
if (!htmlOutput)
return null;
let { content } = htmlOutput;
if (htmlOutput.link) {
const response = await session.fetch(htmlOutput.link);
if (!response.ok)
return null;
content = await response.text();
}
return content ? getEditorStateFromHTML(content) : null;
}
export function outputHasHtml(version) {
return version.data.outputs.reduce((found, { kind, content }) => {
return found || (kind === OutputSummaryKind.html && Boolean(content));
}, false);
}
export function outputHasImage(version) {
return version.data.outputs.reduce((found, { kind }) => {
return found || kind === OutputSummaryKind.image;
}, false);
}
export async function walkArticle(session, data, templateTags = [], referenceFormat = ReferenceFormatTypes.bibtex) {
session.log.debug('Starting walkArticle...');
const images = {};
const referenceKeys = new Set();
const references = {};
const limiter = new Bottleneck({ maxConcurrent: 25 });
const templateTagSet = new Set(templateTags); // ensure dedupe
const children = await Promise.all(data.order.map(async (k) => {
const articleChild = data.children[k];
const srcId = articleChild?.src;
const style = articleChild?.style ?? {};
if (!srcId)
return {};
const childBlock = await limiter.schedule(() => new Block(session, srcId).get());
const childVersion = await limiter.schedule(() => new Version(session, srcId).get());
// Do not walk the content if it shouldn't be walked
if (new Set(childBlock.data.tags).has('no-export'))
return {};
switch (childVersion.data.kind) {
case KINDS.Content: {
const state = getEditorState(childVersion.data.content);
const matchingTags = childBlock.data.tags.filter((t) => templateTagSet.has(t));
return {
state,
block: childBlock,
version: childVersion,
templateTags: matchingTags.length > 0 ? matchingTags : undefined,
};
}
case KINDS.Code: {
const version = childVersion;
const html = getCodeHTML(version.data.content, version.data.language, false);
const state = getEditorState(html);
return {
state,
block: childBlock,
version: childVersion,
};
}
case KINDS.Image: {
const key = oxaLink('', childVersion.id);
const version = childVersion;
if (!key)
return {};
const html = getFigureHTML(articleChild.src.block, key, childVersion.data.title,
// Note: the caption is on the block!
childBlock.data.caption ?? '', style);
const state = getEditorState(html);
images[key] = version;
return { state, block: childBlock, version };
}
case KINDS.Output: {
const key = oxaLink('', childVersion.id);
const version = childVersion;
if (!key)
return {};
if (outputHasImage(version)) {
const html = getFigureHTML(articleChild.id, key, childVersion.data.title,
// Note: the caption is on the block!
childBlock.data.caption ?? '', style);
const state = getEditorState(html);
images[key] = version;
return { state, block: childBlock, version };
}
if (outputHasHtml(version)) {
const state = await getEditorStateFromFirstHTMLOutput(session, version);
if (state == null)
return {};
return { state, block: childBlock, version };
}
return { block: childBlock, version };
}
default:
return {};
}
}));
// Load all images and references
Object.entries(children).forEach(([, { state }]) => {
if (!state)
return;
state.doc.descendants((node) => {
switch (node.type.name) {
case nodeNames.image: {
const { src } = node.attrs;
const id = oxaLinkToId(src)?.block;
if (id)
images[src] = new Version(session, id);
return true;
}
case nodeNames.cite: {
const { key, kind } = node.attrs;
switch (kind) {
case ReferenceKind.cite:
if (key)
referenceKeys.add(key);
return true;
case ReferenceKind.table:
case ReferenceKind.eq:
case ReferenceKind.sec:
case ReferenceKind.code:
case ReferenceKind.fig:
// TODO: add a lookup table for reference IDs
return true;
default:
return true;
}
}
default:
return true;
}
});
});
// Load all of the references
session.log.debug('Starting Reference Localizaton...');
await Promise.all([...referenceKeys].map(async (key) => {
const id = oxaLinkToId(key)?.block;
if (!id)
return;
// Always load the latest version for references!
const { version } = await limiter.schedule(() => getBlockAndLatestVersion(session, id, {
format: referenceFormat,
}));
if (!version) {
session.log.error(`Could not fetch latest version of reference - skipping ${key}`);
return;
}
if (version.data.kind !== KINDS.Reference)
return;
const { content } = version.data;
// Extract the label: '@article{SimPEG2015,\n...' ➡️ 'SimPEG2015'
const label = content.slice(content.indexOf('{') + 1, content.indexOf(','));
const existing = references[basekey(key)];
const state = referenceFormat === ReferenceFormatTypes.html ? getEditorState(content) : undefined;
if (existing?.version) {
const ve = existing.version.id.version;
const v = version.id.version;
// if existing, only update if incoming version is defined and greater than the existing
if (ve == null || ve < (v ?? 0)) {
references[basekey(key)] = {
label,
bibtex: content,
version,
state,
};
}
}
else {
references[basekey(key)] = {
label,
bibtex: content,
version,
state,
};
}
}));
const contentChildren = children
.filter((c) => !c.templateTags)
.filter((c) => Object.keys(c).length > 0);
const taggedChildren = children
.filter((c) => c.templateTags)
.filter((c) => Object.keys(c).length > 0);
const tagged = Array.from(templateTagSet).reduce((obj, tag) => {
return { ...obj, [tag]: taggedChildren.filter((c) => c.templateTags?.indexOf(tag) !== -1) };
}, {});
return {
children: contentChildren,
images,
references,
tagged,
};
}
export async function loadImagesToBuffers(session, images) {
const buffers = {};
await Promise.all(Object.entries(images).map(async ([key, version]) => {
await version.get();
const { src } = getImageSrc(version);
if (!src)
return;
const response = await session.fetch(src);
// TODO convert SVGs to PNG` with imagemagick
const buffer = await response.buffer();
buffers[key] = buffer;
}));
return buffers;
}
export function loadLocalImagesToBuffers(images) {
const buffers = {};
images.forEach((image) => {
buffers[image.url] = fs.readFileSync(image.url);
});
return buffers;
}