UNPKG

publication-ids

Version:

Javascript / Typescript validator and parse for publication ids; DOI, PMID, PMCID, ISBN, and ISSN

431 lines (410 loc) 12 kB
var __defProp = Object.defineProperty; var __getOwnPropNames = Object.getOwnPropertyNames; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __hasOwnProp = Object.prototype.hasOwnProperty; var __moduleCache = /* @__PURE__ */ new WeakMap; var __toCommonJS = (from) => { var entry = __moduleCache.get(from), desc; if (entry) return entry; entry = __defProp({}, "__esModule", { value: true }); if (from && typeof from === "object" || typeof from === "function") __getOwnPropNames(from).map((key) => !__hasOwnProp.call(entry, key) && __defProp(entry, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable })); __moduleCache.set(from, entry); return entry; }; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true, configurable: true, set: (newValue) => all[name] = () => newValue }); }; // src/index.ts var exports_src = {}; __export(exports_src, { validatePmid: () => validate_default4, validateIssn: () => validate_default3, validateIsbn: () => validate_default2, validateDoi: () => validate_default, sanitizePmid: () => sanitize_default4, sanitizeIssn: () => sanitize_default3, sanitizeIsbn: () => sanitize_default, sanitizeDoi: () => sanitize_default2, parsePmid: () => parse_default4, parseIssn: () => parse_default3, parseIsbn: () => parse_default, parseDoi: () => parse_default2, parse: () => parse_default5, default: () => src_default }); module.exports = __toCommonJS(exports_src); // src/doi/regex.ts var REGEX_DOI = /(10.\d{4,9}\/[-._;()/:A-Z0-9]+)/gi; var regex_default = REGEX_DOI; // src/doi/validate.ts var validate_default = (identifier) => { if (!identifier) { return false; } const doiRegex = new RegExp(`^${regex_default.source}$`, "gi"); return doiRegex.test(identifier); }; // src/isbn/convert.ts var calculateIsbn10Checksum = (isbn) => { let sum = 0; for (let i = 0;i < 9; i++) { sum += (i + 1) * parseInt(isbn[i], 10); } const remainder = sum % 11; return remainder === 10 ? "X" : remainder.toString(); }; var calculateIsbn13Checksum = (isbn) => { let sum = 0; for (let i = 0;i < 12; i++) { sum += (i % 2 === 0 ? 1 : 3) * parseInt(isbn[i], 10); } const remainder = sum % 10; return ((10 - remainder) % 10).toString(); }; var convertIsbn13ToIsbn10 = (isbn13) => { const isbn10Base = isbn13.substring(3, 12); const checksum = calculateIsbn10Checksum(isbn10Base); return isbn10Base + checksum; }; var convertIsbn10ToIsbn13 = (isbn10) => { const isbn13Base = "978" + isbn10.substring(0, 9); const checksum = calculateIsbn13Checksum(isbn13Base); return isbn13Base + checksum; }; // src/isbn/regex.ts var REGEX_ISBN = /^(?:ISBN(?:-1[03])?:? )?(?=[0-9X]{10}$|(?=(?:[0-9]+[- ]){3})[- 0-9X]{13}$|97[89][0-9]{10}$|(?=(?:[0-9]+[- ]){4})[- 0-9]{17}$)(?:97[89][- ]?)?[0-9]{1,5}[- ]?[0-9]+[- ]?[0-9]+[- ]?[0-9X]$/; var regex_default2 = REGEX_ISBN; // src/isbn/sanitize.ts var isbnRegex = new RegExp(regex_default2); var sanitize_default = (input) => { if (!input) { return []; } const inputArray = Array.isArray(input) ? input : [...input.split(" "), input]; const cleanIsbns = inputArray.reduce((acc, inputString) => { const cleanInput = inputString.replace(/-/g, ""); const matches = cleanInput.trim().match(isbnRegex)?.filter(Boolean); if (matches) { matches.forEach((candidate) => { acc.push(candidate.replace(/[-\s]|^ISBN(?:-1[03])?:?/g, "")); }); } return acc; }, []); return [...new Set(cleanIsbns)]; }; // src/isbn/validate.ts var validate_default2 = (identifier) => { if (!identifier) { return false; } const sanitized = sanitize_default(identifier); if (!sanitized.length) { return false; } const chars = sanitized[0].split(""); const last = chars.pop(); let sum = 0; let check; if (chars.length === 9) { for (let i = 0;i < chars.length; i++) { sum += (i + 1) * parseInt(chars[i], 10); } check = sum % 11; check = check === 10 ? "X" : check.toString(); } else { for (let i = 0;i < chars.length; i++) { sum += (i % 2 === 0 ? 1 : 3) * parseInt(chars[i], 10); } check = (10 - sum % 10) % 10; check = check.toString(); } return check === last; }; // src/isbn/parse.ts var parse_default = (source) => { const sanitizedIsbns = sanitize_default(source); if (!sanitizedIsbns.length) { return [{ source, isValid: false }]; } return sanitizedIsbns.map((sanitizedIsbn) => { const isValid = validate_default2(sanitizedIsbn); return { source, isValid, ...isValid && { isbn10: sanitizedIsbn.length === 13 ? convertIsbn13ToIsbn10(sanitizedIsbn) : sanitizedIsbn, isbn13: sanitizedIsbn.length === 10 ? convertIsbn10ToIsbn13(sanitizedIsbn) : sanitizedIsbn } }; }); }; // src/doi/sanitize.ts var sanitize_default2 = (input) => { if (!input) { return []; } const doiRegex = new RegExp(regex_default, "gi"); const inputString = Array.isArray(input) ? input.join(" ") : input; const decoded = decodeURIComponent(inputString); const matches = decoded.match(doiRegex)?.filter(Boolean); if (!matches) { return []; } const badEndings = [ /v.*\.pdf/, /\/asset\/.*/, /\.$/, ".full.pdf", ".full.html", ".full.htm", ".full.txt", ".pdf", ".html", ".htm", ".txt", ".full", "/full", "/html", "/abstract", "/full/html", "/html/full", "/pdf", "/endnote", "/reference", "/epub", "/text", "/bibtext" ]; const cleanDois = matches.map((candidate) => { let cleanDoi = candidate.toLowerCase().trim(); badEndings.forEach((ending) => { if (ending instanceof RegExp) { cleanDoi = cleanDoi.replace(ending, ""); } else if (cleanDoi.endsWith(ending)) { cleanDoi = cleanDoi.slice(0, -ending.length); } }); return cleanDoi; }); return [...new Set(cleanDois)]; }; // src/doi/parse.ts var ISBN13_REGEX = /97[89][0-9]{10}/; var parse_default2 = (source) => { const sanitizedDois = sanitize_default2(source); if (!sanitizedDois.length) { return [{ source, doi: undefined, isValid: false }]; } return sanitizedDois.map((sanitizedDoi) => { const isValid = validate_default(sanitizedDoi); const potentialIsbnFragment = sanitizedDoi.replace(/-/g, ""); const potentialIsbn = ISBN13_REGEX.exec(potentialIsbnFragment)?.[0] ?? ""; const isbn = potentialIsbn ? parse_default(potentialIsbn)[0] : { isValid: false }; if (isbn.isValid) { const [doiWithoutChapter, chapter] = sanitizedDoi.split(/[-_]/).reduce((acc, part, index, array) => { if (index === array.length - 1 && /^\d+$/.test(part) && /10\.\d{4,9}\/[-._;()/:A-Z0-9]+$/.test(acc[0])) { acc[1] = part; } else { acc[0] += (acc[0] ? "-" : "") + part; } return acc; }, ["", undefined]); const resolve2 = isValid ? `https://doi.org/${doiWithoutChapter}` : ""; return { source, doi: doiWithoutChapter, isValid, resolve: resolve2, isbn: { ...isbn, chapter } }; } const resolve = isValid ? `https://doi.org/${sanitizedDoi}` : ""; return { source, doi: sanitizedDoi, isValid, resolve, isbn }; }); }; // src/issn/regex.ts var REGEX_ISSN = /[\d]{4}[-\s]?[\d]{3}[\dX]/; var regex_default3 = REGEX_ISSN; // src/issn/sanitize.ts var issnRegex = new RegExp(regex_default3, "gi"); var sanitize_default3 = (input) => { if (!input) { return []; } const inputArray = Array.isArray(input) ? input : [...input.split(" "), input]; const cleanIssns = inputArray.reduce((acc, inputString) => { const matches = inputString.trim().match(issnRegex)?.filter(Boolean); if (matches) { matches.forEach((candidate) => { acc.push(candidate.replace(/[-\s]|^ISSN:?/g, "")); }); } return acc; }, []); return [...new Set(cleanIssns)]; }; // src/issn/validate.ts var validate_default3 = (identifier) => { if (!identifier) { return false; } if (!regex_default3.test(identifier)) { return false; } const chars = sanitize_default3(identifier)[0].split(""); const last = chars.pop(); let sum = 0; for (let i = 0;i < chars.length; i++) { sum += parseInt(chars[i], 10) * (8 - i); } const remainder = sum % 11; const checkDigit = remainder === 0 ? "0" : 11 - remainder === 10 ? "X" : (11 - remainder).toString(); return checkDigit === last; }; // src/issn/parse.ts var parse_default3 = (source) => { const sanitizedIssns = sanitize_default3(source); if (!sanitizedIssns.length) { return [{ source, isValid: false }]; } return sanitizedIssns.map((sanitizedIssn) => { const isValid = validate_default3(sanitizedIssn); return { source, isValid, ...isValid && { issn: sanitizedIssn } }; }); }; // src/pmid/regex.ts var REGEX_PMID = /\b(?:PMID:)?\s?\d{1,8}/; var REGEX_PMCID = /(?:PMC)\s?\d{1,8}/; // src/pmid/sanitize.ts var sanitize_default4 = (input) => { if (!input) { return []; } const inputString = Array.isArray(input) ? input.join(" ") : input; const decoded = decodeURIComponent(inputString); const pmidMatches = decoded.match(new RegExp(REGEX_PMID, "gi")) ?? []; const pmcidMatches = decoded.match(new RegExp(REGEX_PMCID, "gi")) ?? []; const matches = [...pmidMatches, ...pmcidMatches].filter(Boolean); if (!matches.length) { return []; } const cleanPMIDs = matches.map((candidate) => candidate.toString().toUpperCase().trim()); return [...new Set(cleanPMIDs)]; }; // src/pmid/validate.ts var validate_default4 = (identifier) => { if (!identifier) { return false; } const pmidRegex = new RegExp(`^${REGEX_PMID.source}$`, "gi"); const pmcidRegex = new RegExp(`^${REGEX_PMCID.source}$`, "gi"); return pmidRegex.test(identifier) || pmcidRegex.test(identifier); }; // src/pmid/parse.ts var parse_default4 = (source) => { const sanitizedPMIDs = sanitize_default4(source); if (!sanitizedPMIDs.length) { return [{ source, pmid: undefined, pmcid: undefined, isValid: false }]; } return sanitizedPMIDs.map((sanitizedId) => { const isValid = validate_default4(sanitizedId); const isPMCID = sanitizedId.startsWith("PMC"); return { source, isValid, ...isPMCID ? { pmcid: sanitizedId, pmid: undefined, resolve: `https://www.ncbi.nlm.nih.gov/pmc/articles/${sanitizedId}/` } : { pmid: sanitizedId, pmcid: undefined, resolve: `https://pubmed.ncbi.nlm.nih.gov/${sanitizedId.replace("PMID: ", "")}/` } }; }); }; // src/global/parse.ts var parse_default5 = (input) => { const inputArray = Array.isArray(input) ? input : input.split(" "); return inputArray.map((id) => { const results = []; if (validate_default(id)) { results.push(...parse_default2(id)); } if (validate_default2(id)) { results.push(...parse_default(id)); } if (validate_default3(id)) { results.push(...parse_default3(id)); } if (validate_default4(id)) { results.push(...parse_default4(id)); } if (results.length === 0) { results.push({ source: id, isValid: false }); } return results; }).flat(); }; // src/index.ts var src_default = { parseDoi: parse_default2, sanitizeDoi: sanitize_default2, validateDoi: validate_default, parseIsbn: parse_default, sanitizeIsbn: sanitize_default, validateIsbn: validate_default2, parseIssn: parse_default3, sanitizeIssn: sanitize_default3, validateIssn: validate_default3, parsePmid: parse_default4, sanitizePmid: sanitize_default4, validatePmid: validate_default4, parse: parse_default5 };