zenodo-utils
Version:
Utilities for working with zenodo
502 lines (501 loc) • 23.3 kB
JavaScript
import path from 'node:path';
import fs from 'node:fs';
import { load as yamlLoad } from 'js-yaml';
import { Command, Option } from 'commander';
import inquirer from 'inquirer';
import { ZenodoClient } from '../index.js';
import { castSession, filterPages, findCurrentProjectAndLoad, getFileContent, loadConfig, loadProject, processProject, resolveFrontmatterParts, selectors, Session, } from 'myst-cli';
import { extractPart, plural } from 'myst-common';
import { mystToHtml } from 'myst-to-html';
import { clirun } from 'myst-cli-utils';
import { addDoiToConfig, addZenodoToConfig } from './utils.js';
const DEPOSIT_FILE_EXTENSIONS = ['.pdf', '.pptx', '.png'];
export async function depositArticleFromSource(session, depositSource) {
var _a, _b, _c, _d, _e;
const { projectPath, depositFile } = depositSource;
const state = session.store.getState();
const configFile = selectors.selectLocalConfigFile(state, projectPath);
const projectFrontmatter = selectors.selectLocalProjectConfig(session.store.getState(), projectPath);
let abstractPart;
let frontmatter;
const dois = {};
if (depositFile === configFile) {
let fileContents;
try {
const { pages } = await loadProject(session, projectPath);
fileContents = await getFileContent(session, pages.map(({ file }) => file), { projectPath, imageExtensions: [] });
}
catch (error) {
fileContents = [];
}
if ((_a = projectFrontmatter === null || projectFrontmatter === void 0 ? void 0 : projectFrontmatter.parts) === null || _a === void 0 ? void 0 : _a.abstract) {
const abstractContent = (_b = castSession(session).$getMdast(projectFrontmatter.parts.abstract[0])) === null || _b === void 0 ? void 0 : _b.pre;
abstractPart = abstractContent === null || abstractContent === void 0 ? void 0 : abstractContent.mdast;
}
else {
fileContents.forEach(({ mdast, frontmatter: fileFrontmatter }) => {
if (abstractPart)
return;
abstractPart = extractPart(mdast, 'abstract', {
frontmatterParts: resolveFrontmatterParts(session, fileFrontmatter),
});
});
}
fileContents.forEach(({ references }) => {
var _a;
(_a = references.cite) === null || _a === void 0 ? void 0 : _a.order.forEach((key) => {
var _a;
const value = (_a = references.cite) === null || _a === void 0 ? void 0 : _a.data[key].doi;
if (value)
dois[key] = value;
else
session.log.warn(`Citation without DOI excluded from zenodo deposit: ${key}`);
});
});
frontmatter = projectFrontmatter;
}
else {
const [fileContent] = await getFileContent(session, [depositFile], {
projectPath,
imageExtensions: [],
});
// Prioritize project title over page title
const title = (_c = projectFrontmatter === null || projectFrontmatter === void 0 ? void 0 : projectFrontmatter.title) !== null && _c !== void 0 ? _c : frontmatter === null || frontmatter === void 0 ? void 0 : frontmatter.title;
// Prioritize project subtitle over page subtitle unless project has no title
const subtitle = (projectFrontmatter === null || projectFrontmatter === void 0 ? void 0 : projectFrontmatter.title)
? ((_d = projectFrontmatter === null || projectFrontmatter === void 0 ? void 0 : projectFrontmatter.subtitle) !== null && _d !== void 0 ? _d : undefined)
: frontmatter === null || frontmatter === void 0 ? void 0 : frontmatter.subtitle;
frontmatter = { ...fileContent.frontmatter, title, subtitle };
abstractPart = extractPart(fileContent.mdast, 'abstract');
(_e = fileContent.references.cite) === null || _e === void 0 ? void 0 : _e.order.forEach((key) => {
var _a;
const value = (_a = fileContent.references.cite) === null || _a === void 0 ? void 0 : _a.data[key].doi;
if (value)
dois[key] = value;
else
session.log.warn(`Citation without DOI excluded from zenodo deposit: ${key}`);
});
}
let abstract;
if (abstractPart) {
abstract = mystToHtml(abstractPart);
}
return {
frontmatter: frontmatter !== null && frontmatter !== void 0 ? frontmatter : {},
dois,
abstract,
configFile,
project: projectFrontmatter,
};
}
async function getDepositSources(session, opts) {
let depositFile;
let projectPath;
// If file is specified, find the containing project and use it as the only source
if (opts.file) {
depositFile = path.resolve(opts.file);
projectPath = await findCurrentProjectAndLoad(session, depositFile);
if (!projectPath) {
throw new Error(`Unable to determine project path from file: ${opts.file}`);
}
return [{ depositFile, projectPath }];
}
// If file is not specified and there is a project on the current path, select a single source from there
await session.reload();
const state = session.store.getState();
projectPath = selectors.selectCurrentProjectPath(state);
const configFile = selectors.selectCurrentProjectFile(state);
if (projectPath && configFile) {
const project = await processProject(session, { path: projectPath }, {
imageExtensions: [],
writeFiles: false,
});
const pages = filterPages(project);
if (pages.length === 0)
throw new Error('No MyST pages found');
const resp = await inquirer.prompt([
{
name: 'depositFile',
type: 'list',
message: 'File:',
choices: [{ file: configFile }, ...filterPages(project)].map(({ file }) => {
return { name: path.relative('.', file), value: file };
}),
},
]);
depositFile = resp.depositFile;
return [{ projectPath, depositFile }];
}
// If there is no project on the current path, load all projects in child folders
const subdirs = fs
.readdirSync('.')
.map((item) => path.resolve(item))
.filter((item) => fs.lstatSync(item).isDirectory());
const depositSources = (await Promise.all(subdirs.map(async (dir) => {
const config = await loadConfig(session, dir);
if (!config)
return;
return {
projectPath: dir,
depositFile: selectors.selectLocalConfigFile(session.store.getState(), dir),
};
}))).filter((source) => !!source);
return depositSources;
}
function issueDataFromArticles(articles) {
let venueTitle;
let venueAbbr;
let venueDoi;
let venueUrl;
let volumeNumber;
let volumeDoi;
let issueNumber;
let issueDoi;
let journalSeries;
let journalIssn;
let eventNumber;
let eventDate;
let eventLocation;
let volumeTitle;
let venuePublisher;
let volumeSubject;
let publicationEditors;
articles.forEach(({ frontmatter }) => {
var _a;
const { volume, issue, venue, editors, contributors, affiliations } = frontmatter;
if (venue === null || venue === void 0 ? void 0 : venue.title) {
if (!venueTitle) {
venueTitle = venue.title;
}
else if (venueTitle !== venue.title) {
throw new Error(`Conflicting venue titles: "${venueTitle}" and "${venue.title}"`);
}
}
if (venue === null || venue === void 0 ? void 0 : venue.short_title) {
if (!venueAbbr) {
venueAbbr = venue.short_title;
}
else if (venueAbbr !== venue.short_title) {
throw new Error(`Conflicting journal abbreviations: "${venueAbbr}" and "${venue.short_title}"`);
}
}
if (venue === null || venue === void 0 ? void 0 : venue.doi) {
if (!venueDoi) {
venueDoi = venue.doi;
}
else if (venueDoi !== venue.doi) {
throw new Error(`Conflicting journal dois: "${venueDoi}" and "${venue.doi}"`);
}
}
if (venue === null || venue === void 0 ? void 0 : venue.url) {
if (!venueUrl) {
venueUrl = venue.url;
}
else if (venueUrl !== venue.url) {
throw new Error(`Conflicting venue urls: "${venueUrl}" and "${venue.url}"`);
}
}
if (venue === null || venue === void 0 ? void 0 : venue.series) {
if (!journalSeries) {
journalSeries = venue.series;
}
else if (journalSeries !== venue.series) {
throw new Error(`Conflicting series: "${journalSeries}" and "${venue.series}"`);
}
}
if (venue === null || venue === void 0 ? void 0 : venue.issn) {
if (!journalIssn) {
journalIssn = venue.issn;
}
else if (journalIssn !== venue.issn) {
throw new Error(`Conflicting issn: "${journalIssn}" and "${venue.issn}"`);
}
}
if ((venue === null || venue === void 0 ? void 0 : venue.number) != null) {
if (!eventNumber) {
eventNumber = venue.number;
}
else if (eventNumber !== venue.number) {
throw new Error(`Conflicting event number: "${eventNumber}" and "${venue.number}"`);
}
}
if ((venue === null || venue === void 0 ? void 0 : venue.date) != null) {
if (!eventDate) {
eventDate = venue.date;
}
else if (eventDate !== venue.date) {
throw new Error(`Conflicting event date: "${eventDate}" and "${venue.date}"`);
}
}
if ((venue === null || venue === void 0 ? void 0 : venue.location) != null) {
if (!eventLocation) {
eventLocation = venue.location;
}
else if (eventLocation !== venue.location) {
throw new Error(`Conflicting event location: "${eventLocation}" and "${venue.location}"`);
}
}
if ((venue === null || venue === void 0 ? void 0 : venue.publisher) != null) {
if (!venuePublisher) {
venuePublisher = venue.publisher;
}
else if (venuePublisher !== venue.publisher) {
throw new Error(`Conflicting venue publisher: "${venuePublisher}" and "${venue.publisher}"`);
}
}
if (volume === null || volume === void 0 ? void 0 : volume.number) {
if (!volumeNumber) {
volumeNumber = String(volume.number);
}
else if (volumeNumber !== String(volume.number)) {
throw new Error(`Conflicting volumes: "${volumeNumber}" and "${volume.number}"`);
}
}
if (volume === null || volume === void 0 ? void 0 : volume.doi) {
if (!volumeDoi) {
volumeDoi = volume.doi;
}
else if (volumeDoi !== volume.doi) {
throw new Error(`Conflicting volume dois: "${volumeDoi}" and "${volume.doi}"`);
}
}
if (issue === null || issue === void 0 ? void 0 : issue.number) {
if (!issueNumber) {
issueNumber = String(issue.number);
}
else if (issueNumber !== String(issue.number)) {
throw new Error(`Conflicting issues: "${issueNumber}" and "${issue.number}"`);
}
}
if (issue === null || issue === void 0 ? void 0 : issue.doi) {
if (!issueDoi) {
issueDoi = issue.doi;
}
else if (issueDoi !== issue.doi) {
throw new Error(`Conflicting issue dois: "${issueDoi}" and "${issue.doi}"`);
}
}
if (volume === null || volume === void 0 ? void 0 : volume.title) {
if (!volumeTitle) {
volumeTitle = volume.title;
}
else if (volumeTitle !== volume.title) {
throw new Error(`Conflicting volume titles: "${volumeTitle}" and "${volume.title}"`);
}
}
if (volume === null || volume === void 0 ? void 0 : volume.subject) {
if (!volumeSubject) {
volumeSubject = volume.subject;
}
else if (volumeSubject !== volume.subject) {
throw new Error(`Conflicting proceedings subjects: "${volumeSubject}" and "${volume.subject}"`);
}
}
if (editors === null || editors === void 0 ? void 0 : editors.length) {
publicationEditors = ((_a = editors === null || editors === void 0 ? void 0 : editors.map((editor) => contributors === null || contributors === void 0 ? void 0 : contributors.find(({ id }) => editor === id)).filter((editor) => !!editor).map((editor) => {
var _a;
return ({
...editor,
affiliations: (_a = editor.affiliations) === null || _a === void 0 ? void 0 : _a.map((aff) => affiliations === null || affiliations === void 0 ? void 0 : affiliations.find((test) => test.id === aff)),
});
})) !== null && _a !== void 0 ? _a : []);
}
});
return {
venueTitle,
venueDoi,
venueAbbr,
venueUrl,
volumeNumber,
volumeDoi,
issueNumber,
issueDoi,
journalSeries,
journalIssn,
eventNumber,
eventDate,
eventLocation,
volumeTitle,
venuePublisher,
volumeSubject,
publicationEditors,
};
}
async function deposit(session, opts) {
var _a, _b, _c, _d;
let { type: depositType } = opts;
const { sandbox, community, publish } = opts;
const client = new ZenodoClient(process.env.ZENODO_TOKEN, sandbox);
if (!depositType) {
const choices = [
{ name: 'Publication', value: 'publication' },
{ name: 'Poster', value: 'poster' },
{ name: 'Presentation', value: 'presentation' },
{ name: 'Dataset', value: 'dataset' },
{ name: 'Image', value: 'image' },
{ name: 'Video', value: 'video' },
{ name: 'Software', value: 'software' },
{ name: 'Lesson', value: 'lesson' },
{ name: 'Physical Object', value: 'physicalobject' },
{ name: 'Other', value: 'other' },
];
const resp = await inquirer.prompt([
{
name: 'depositType',
type: 'list',
message: 'Deposit type:',
choices,
},
]);
depositType = resp.depositType;
}
if (!depositType) {
throw new Error('No deposit type specified');
}
const depositSources = await getDepositSources(session, opts);
const depositArticles = (await Promise.all(depositSources.map((source) => depositArticleFromSource(session, source)))).sort((a, b) => Number(a.frontmatter.first_page) - Number(b.frontmatter.first_page));
if (depositArticles.length === 0) {
throw Error('nothing found for deposit');
}
session.log.info(`🔍 Found ${plural('%s article(s)', depositArticles)} for ${depositType} deposit`);
const { venueTitle, venueAbbr, venueUrl, eventDate, eventLocation, publicationEditors } = issueDataFromArticles(depositArticles);
for (let index = 0; index < depositArticles.length; index++) {
const { configFile, frontmatter, abstract, project } = depositArticles[index];
session.log.info(`\nProcessing: "${frontmatter.title}"`);
if (!configFile) {
throw new Error(`No config file found for source: ${frontmatter.title}`);
}
let zenodoDepositId = getZenodoId(configFile);
if (!zenodoDepositId) {
const createdData = await client.createEmptyDeposition();
zenodoDepositId = createdData.id;
session.log.debug(JSON.stringify(createdData, null, 2));
session.log.info(`🎉 Created deposit ${zenodoDepositId}: ${createdData.links.html}`);
addZenodoToConfig(configFile, zenodoDepositId, sandbox);
}
else {
session.log.info(`🔍 Found existing deposit ID ${zenodoDepositId}`);
}
const existingData = await client.getDeposition(zenodoDepositId);
if (existingData.submitted) {
throw new Error(`Deposit ${zenodoDepositId} already submitted`);
}
if (!frontmatter.title)
throw new Error('The deposit must have a title');
if (!abstract)
throw new Error('The deposit must have an abstract');
const data = {
title: frontmatter.title,
description: abstract,
upload_type: depositType,
publication_date: frontmatter.date,
imprint_publisher: venueAbbr || venueTitle,
creators: (_b = (_a = frontmatter.authors) === null || _a === void 0 ? void 0 : _a.map((a) => {
var _a, _b, _c, _d, _e;
return ({
// TODO: improve this for non-western name, particles, etc.
name: `${(_a = a.nameParsed) === null || _a === void 0 ? void 0 : _a.family}, ${(_b = a.nameParsed) === null || _b === void 0 ? void 0 : _b.given}`,
affiliation: (_e = (_d = (_c = a.affiliations) === null || _c === void 0 ? void 0 : _c.map((aff) => { var _a; return (_a = frontmatter.affiliations) === null || _a === void 0 ? void 0 : _a.find((test) => test.id === aff); })) === null || _d === void 0 ? void 0 : _d.map((aff) => aff === null || aff === void 0 ? void 0 : aff.name)) === null || _e === void 0 ? void 0 : _e.filter((aff) => !!aff).join(', '),
orcid: a.orcid,
});
})) !== null && _b !== void 0 ? _b : [],
doi: frontmatter.doi,
};
if (community) {
data.communities = [{ identifier: community }];
}
if (depositType === 'presentation' || depositType === 'poster') {
data.conference_title = venueTitle;
data.contributors = publicationEditors === null || publicationEditors === void 0 ? void 0 : publicationEditors.map((e) => ({
type: 'Editor',
name: e.name,
orcid: e.orcid,
affiliation: e.affiliations.map((a) => a.name).join(', '),
}));
data.conference_acronym = venueAbbr;
data.conference_url = venueUrl;
data.conference_dates = eventDate;
data.conference_place = eventLocation;
if (frontmatter.github) {
data.custom = {
'code:codeRepository': frontmatter.github,
};
}
}
session.log.debug(JSON.stringify(data, null, 2));
const updatedData = await client.updateDeposition(zenodoDepositId, data);
session.log.debug(JSON.stringify(updatedData, null, 2));
session.log.info(`✍️ Updated deposit ${zenodoDepositId}: ${updatedData.links.html}`);
if ((_c = updatedData.files) === null || _c === void 0 ? void 0 : _c.length) {
session.log.info(`🛑 Skipping files - deposit already has ${plural('%s file(s)', updatedData.files)} uploaded`);
}
else {
let filesToUpload = (_d = project === null || project === void 0 ? void 0 : project.downloads) === null || _d === void 0 ? void 0 : _d.map((download) => download === null || download === void 0 ? void 0 : download.url).filter((download) => !!download).map((download) => path.resolve(path.dirname(configFile), download));
if (!(filesToUpload === null || filesToUpload === void 0 ? void 0 : filesToUpload.length)) {
filesToUpload = fs
.readdirSync(path.dirname(configFile))
.filter((file) => DEPOSIT_FILE_EXTENSIONS.find((ext) => file.toLowerCase().endsWith(ext)))
.map((file) => path.resolve(path.dirname(configFile), file));
}
if (!(filesToUpload === null || filesToUpload === void 0 ? void 0 : filesToUpload.length)) {
throw new Error(`🚨 No files found to upload for deposit ${zenodoDepositId}`);
}
session.log.info(`🔍 Found ${plural('%s file(s)', filesToUpload)} to upload`);
for (let jj = 0; jj < filesToUpload.length; jj++) {
session.log.debug(`Uploading ${filesToUpload[jj]}`);
try {
await client.uploadFile(existingData.links.bucket, filesToUpload[jj]);
}
catch (error) {
session.log.warn('Error writing file, trying again!');
await client.uploadFile(existingData.links.bucket, filesToUpload[jj]);
}
}
}
if (publish) {
const publishedData = await client.publishDeposition(zenodoDepositId);
session.log.debug(JSON.stringify(publishedData, null, 2));
if (!frontmatter.doi) {
addDoiToConfig(configFile, publishedData.metadata.doi);
}
session.log.info(`🚀 Published deposit ${zenodoDepositId}: ${publishedData.links.html}`);
}
}
}
function makeDepositCLI(program) {
const choices = [
'publication',
'poster',
'presentation',
'dataset',
'image',
'video',
'software',
'lesson',
'physicalobject',
'other',
];
const command = new Command('deposit')
.description('Create Zenodo deposit XML from local MyST content')
.addOption(new Option('--file <value>', 'File to deposit'))
.addOption(new Option('--type <value>', 'Deposit type').choices(choices).default('presentation'))
.addOption(new Option('--community <value>', 'Zenodo community identifier'))
.addOption(new Option('--sandbox', 'Use the sandbox for testing purposes'))
.addOption(new Option('--publish', 'Publish the resource'))
.action(clirun(deposit, { program, getSession: (logger) => new Session({ logger }) }));
return command;
}
export function addDepositCLI(program) {
program.addCommand(makeDepositCLI(program));
}
function getZenodoId(configFile) {
var _a, _b;
// This shouldn't be needed in the future
if (!configFile)
return undefined;
const data = yamlLoad(fs.readFileSync(configFile).toString());
const url = (_b = (_a = data === null || data === void 0 ? void 0 : data.project) === null || _a === void 0 ? void 0 : _a.identifiers) === null || _b === void 0 ? void 0 : _b.zenodo;
if (!url)
return undefined;
return Number.parseInt(String(url).split('/').slice(-1)[0], 10);
}