UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

304 lines 11.1 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.AuthorRole = void 0; exports.rAuthorInfoToReadable = rAuthorInfoToReadable; exports.parseRAuthorString = parseRAuthorString; exports.parseTextualAuthorString = parseTextualAuthorString; const args_1 = require("./text/args"); const assert_1 = require("./assert"); const objects_1 = require("./objects"); const retriever_1 = require("../r-bridge/retriever"); /** https://r-pkgs.org/description.html#sec-description-authors-at-r */ var AuthorRole; (function (AuthorRole) { /** the creator or maintainer, the person you should bother if you have problems. Despite being short for “creator”, this is the correct role to use for the current maintainer, even if they are not the initial creator of the package. */ AuthorRole["Creator"] = "cre"; /** authors, those who have made significant contributions to the package. */ AuthorRole["Author"] = "aut"; /** contributors, those who have made smaller contributions, like patches. */ AuthorRole["Contributor"] = "ctb"; /** copyright holder. This is used to list additional copyright holders who are not authors, typically companies, like an employer of one or more of the authors. */ AuthorRole["CopyrightHolder"] = "cph"; /** funder, the people or organizations that have provided financial support for the development of the package. */ AuthorRole["Funder"] = "fnd"; })(AuthorRole || (exports.AuthorRole = AuthorRole = {})); /** * Convert structured R author information into an R `Authors@R` string. */ function rAuthorInfoToReadable(author) { const nameStr = author.name.join(' '); const emailStr = author.email ? ` <${author.email}>` : ''; const rolesStr = author.roles.length > 0 ? ` [${author.roles.join(', ')}]` : ''; const orcidStr = author.orcid ? ` (ORCID: ${author.orcid})` : ''; const commentStr = author.comment && author.comment.length > 0 ? ` {${author.comment.join('; ')}}` : ''; return `${nameStr}${emailStr}${rolesStr}${orcidStr}${commentStr}`; } /** * Parse an R `Authors@R` string into structured author information. * These are mostly found in `R` DESCRIPTION files and are a vector of `person()` calls. * For now, this works *without* the full dataflow engine, so complex cases may not be parsed correctly. */ function parseRAuthorString(authorString) { const str = authorString.trim(); if (str.startsWith('c(') && str.endsWith(')')) { const inner = str.slice(2, -1).trim(); const parts = joinPartsWithVectors((0, args_1.splitAtEscapeSensitive)(inner, false, ',')); const authors = []; for (const part of parts) { const author = parseRPersonCall(part); if (author) { authors.push(author); } } return authors; } else if (str.startsWith('person(') && str.endsWith(')')) { const author = parseRPersonCall(str); return author ? [author] : []; } return []; } function splitArgNameValue(arg) { const eqIndex = arg.indexOf('='); if (eqIndex === -1) { const trimmedArg = arg.trim(); return { value: trimmedArg.length === 0 ? undefined : trimmedArg }; } else { const name = arg.slice(0, eqIndex).trim(); const value = arg.slice(eqIndex + 1).trim(); return { name, value }; } } // Joins parts that may be split by c(...) vectors back together, ... function joinPartsWithVectors(parts) { const result = []; let buffer = []; let parenthesisLevel = 0; for (const part of parts) { const trimmed = part.trim(); // check whether parenthesis are balanced for (const char of trimmed) { if (char === '(') { parenthesisLevel++; } else if (char === ')') { parenthesisLevel--; } } if (parenthesisLevel === 0) { buffer.push(trimmed); result.push(buffer.join(', ')); buffer = []; } else { buffer.push(trimmed); } } if (buffer.length > 0) { result.push(buffer.join(', ')); } return result; } const defaultPosArgNames = ['given', 'family', 'middle', 'email', 'role', 'comment', 'first', 'last']; function splitVector(roleStr) { if (roleStr.startsWith('c(') && roleStr.endsWith(')')) { const inner = roleStr.slice(2, -1).trim(); return joinPartsWithVectors((0, args_1.splitAtEscapeSensitive)(inner, false, ',')); } else { return [roleStr.trim()]; } } function parseRoles(roleStr) { if (!roleStr) { return []; } const roles = []; const parts = splitVector(roleStr); for (const part of parts) { const trimmed = part.trim(); const roleValue = (0, retriever_1.removeRQuotes)(trimmed); if (Object.values(AuthorRole).includes(roleValue)) { roles.push(roleValue); } } return roles; } function parseComments(commentStr) { if (!commentStr) { return undefined; } const comments = []; const parts = splitVector(commentStr); let orcid = undefined; for (const part of parts) { const trimmed = part.trim(); const commentValue = (0, retriever_1.removeRQuotes)(trimmed); if (/ORCID\s*=/ig.test(commentValue)) { const orcidIndex = commentValue.indexOf('='); if (orcidIndex !== -1) { orcid = (0, retriever_1.removeRQuotes)(commentValue.slice(orcidIndex + 1).trim()); } continue; } comments.push(commentValue); } return comments.length > 0 || orcid ? { contents: comments, orcid: orcid } : undefined; } function assignArg(argMap, split) { argMap.set(split.name, split.value === undefined || split.value?.length === 0 ? undefined : (0, retriever_1.removeRQuotes)(split.value)); } function parseRPersonCall(personCall) { /* function(given = NULL, family = NULL, middle = NULL, email = NULL, role = NULL, comment = NULL, first = NULL, last = NULL), but we neither use nor support full R semantics here for now */ personCall = personCall.trim(); if (!personCall.startsWith('person(') || !personCall.endsWith(')')) { return undefined; } const inner = personCall.slice(7, -1).trim(); // these may also split unescaped commas inside c(...) const parArgs = joinPartsWithVectors((0, args_1.splitAtEscapeSensitive)(inner, false, ',')); const argMap = new Map(); const unnamed = []; for (const arg of parArgs) { const split = splitArgNameValue(arg.trim()); if (!split.name) { unnamed.push(arg.trim()); continue; } assignArg(argMap, split); } // assign unnamed args in order for (let i = 0; i < unnamed.length; i++) { if (i >= defaultPosArgNames.length) { break; } const argIdx = defaultPosArgNames.findIndex(x => !argMap.has(x)); if (argIdx === -1) { break; } const argName = defaultPosArgNames[argIdx]; const value = unnamed[i]; assignArg(argMap, { name: argName, value }); } const comments = parseComments(argMap.get('comment')); return (0, objects_1.compactRecord)({ name: [argMap.get('given') ?? argMap.get('first'), argMap.get('middle'), argMap.get('family') ?? argMap.get('last')].filter(assert_1.isNotUndefined), email: argMap.get('email'), roles: parseRoles(argMap.get('role')), comment: comments?.contents, orcid: comments?.orcid }); } function collectUntil(source, anyOf) { let collected = ''; let i = 0; while (i < source.length && !anyOf.test(source[i])) { collected += source[i]; i++; } return { collected, rest: source.slice(i) }; } /** * In contrast to `parseRAuthorString`, this function parses simple textual author strings, * like `First Middle Last <email> [roles] (comment)...`. It does not support the full R `person()` syntax. */ function parseTextualAuthorString(authorString, addRoles = []) { const parts = (0, args_1.splitOnNestingSensitive)(authorString, 'and', { '<': '>', '[': ']', '(': ')', '"': '"', "'": "'" }); const authors = []; for (const part of parts) { const name = collectUntil(part.trim(), /[<[(]/); const others = parseOnRest(name.rest); const c = processComment(others.comment); authors.push((0, objects_1.compactRecord)({ name: name.collected.trim().split(/\s+/), email: others.email, roles: others.roles.concat(addRoles), comment: c.comment, orcid: c.orcid })); } return authors; } function processComment(comment) { if (!comment) { return {}; } const parts = comment.split(/\s*[,;]\s*/); const comments = []; let orcid = undefined; for (const part of parts) { const orcidExtract = /ORCID\s*[:= ]\s*(?<orcid>.+)/i; const match = part.match(orcidExtract); if (match && match.groups && match.groups['orcid']) { orcid = match.groups['orcid'].trim(); } else { comments.push(part.trim()); } } if (orcid) { return { comment: comments.length > 0 ? comments : undefined, orcid }; } else { return { comment: [comment] }; } } function parseOnRest(rest) { let email = undefined; let roles = []; let comment = undefined; while (rest.length > 0) { rest = rest.trim(); switch (rest[0]) { case '<': { const emailEnd = rest.indexOf('>'); if (emailEnd !== -1) { email = rest.slice(1, emailEnd).trim(); rest = rest.slice(emailEnd + 1); } else { rest = ''; } break; } case '[': { const rolesEnd = rest.indexOf(']'); if (rolesEnd !== -1) { const rolesStr = rest.slice(1, rolesEnd).trim(); roles = rolesStr.split(/\s*,\s*/).map(r => r); rest = rest.slice(rolesEnd + 1); } else { rest = ''; } break; } case '(': { const commentEnd = rest.indexOf(')'); if (commentEnd !== -1) { comment = rest.slice(1, commentEnd).trim(); rest = rest.slice(commentEnd + 1); } else { rest = ''; } break; } default: { rest = ''; break; } } } return { email, roles, comment }; } //# sourceMappingURL=r-author.js.map