publication-ids
Version:
Javascript / Typescript validator and parse for publication ids; DOI, PMID, PMCID, ISBN, and ISSN
401 lines (381 loc) • 11 kB
JavaScript
// src/doi/regex.ts
var REGEX_DOI = /(10.\d{4,9}\/[-._;()/:A-Z0-9]+)/gi;
var regex_default = REGEX_DOI;
// src/doi/validate.ts
var validate_default = (identifier) => {
if (!identifier) {
return false;
}
const doiRegex = new RegExp(`^${regex_default.source}$`, "gi");
return doiRegex.test(identifier);
};
// src/isbn/convert.ts
var calculateIsbn10Checksum = (isbn) => {
let sum = 0;
for (let i = 0;i < 9; i++) {
sum += (i + 1) * parseInt(isbn[i], 10);
}
const remainder = sum % 11;
return remainder === 10 ? "X" : remainder.toString();
};
var calculateIsbn13Checksum = (isbn) => {
let sum = 0;
for (let i = 0;i < 12; i++) {
sum += (i % 2 === 0 ? 1 : 3) * parseInt(isbn[i], 10);
}
const remainder = sum % 10;
return ((10 - remainder) % 10).toString();
};
var convertIsbn13ToIsbn10 = (isbn13) => {
const isbn10Base = isbn13.substring(3, 12);
const checksum = calculateIsbn10Checksum(isbn10Base);
return isbn10Base + checksum;
};
var convertIsbn10ToIsbn13 = (isbn10) => {
const isbn13Base = "978" + isbn10.substring(0, 9);
const checksum = calculateIsbn13Checksum(isbn13Base);
return isbn13Base + checksum;
};
// src/isbn/regex.ts
var REGEX_ISBN = /^(?:ISBN(?:-1[03])?:? )?(?=[0-9X]{10}$|(?=(?:[0-9]+[- ]){3})[- 0-9X]{13}$|97[89][0-9]{10}$|(?=(?:[0-9]+[- ]){4})[- 0-9]{17}$)(?:97[89][- ]?)?[0-9]{1,5}[- ]?[0-9]+[- ]?[0-9]+[- ]?[0-9X]$/;
var regex_default2 = REGEX_ISBN;
// src/isbn/sanitize.ts
var isbnRegex = new RegExp(regex_default2);
var sanitize_default = (input) => {
if (!input) {
return [];
}
const inputArray = Array.isArray(input) ? input : [...input.split(" "), input];
const cleanIsbns = inputArray.reduce((acc, inputString) => {
const cleanInput = inputString.replace(/-/g, "");
const matches = cleanInput.trim().match(isbnRegex)?.filter(Boolean);
if (matches) {
matches.forEach((candidate) => {
acc.push(candidate.replace(/[-\s]|^ISBN(?:-1[03])?:?/g, ""));
});
}
return acc;
}, []);
return [...new Set(cleanIsbns)];
};
// src/isbn/validate.ts
var validate_default2 = (identifier) => {
if (!identifier) {
return false;
}
const sanitized = sanitize_default(identifier);
if (!sanitized.length) {
return false;
}
const chars = sanitized[0].split("");
const last = chars.pop();
let sum = 0;
let check;
if (chars.length === 9) {
for (let i = 0;i < chars.length; i++) {
sum += (i + 1) * parseInt(chars[i], 10);
}
check = sum % 11;
check = check === 10 ? "X" : check.toString();
} else {
for (let i = 0;i < chars.length; i++) {
sum += (i % 2 === 0 ? 1 : 3) * parseInt(chars[i], 10);
}
check = (10 - sum % 10) % 10;
check = check.toString();
}
return check === last;
};
// src/isbn/parse.ts
var parse_default = (source) => {
const sanitizedIsbns = sanitize_default(source);
if (!sanitizedIsbns.length) {
return [{
source,
isValid: false
}];
}
return sanitizedIsbns.map((sanitizedIsbn) => {
const isValid = validate_default2(sanitizedIsbn);
return {
source,
isValid,
...isValid && {
isbn10: sanitizedIsbn.length === 13 ? convertIsbn13ToIsbn10(sanitizedIsbn) : sanitizedIsbn,
isbn13: sanitizedIsbn.length === 10 ? convertIsbn10ToIsbn13(sanitizedIsbn) : sanitizedIsbn
}
};
});
};
// src/doi/sanitize.ts
var sanitize_default2 = (input) => {
if (!input) {
return [];
}
const doiRegex = new RegExp(regex_default, "gi");
const inputString = Array.isArray(input) ? input.join(" ") : input;
const decoded = decodeURIComponent(inputString);
const matches = decoded.match(doiRegex)?.filter(Boolean);
if (!matches) {
return [];
}
const badEndings = [
/v.*\.pdf/,
/\/asset\/.*/,
/\.$/,
".full.pdf",
".full.html",
".full.htm",
".full.txt",
".pdf",
".html",
".htm",
".txt",
".full",
"/full",
"/html",
"/abstract",
"/full/html",
"/html/full",
"/pdf",
"/endnote",
"/reference",
"/epub",
"/text",
"/bibtext"
];
const cleanDois = matches.map((candidate) => {
let cleanDoi = candidate.toLowerCase().trim();
badEndings.forEach((ending) => {
if (ending instanceof RegExp) {
cleanDoi = cleanDoi.replace(ending, "");
} else if (cleanDoi.endsWith(ending)) {
cleanDoi = cleanDoi.slice(0, -ending.length);
}
});
return cleanDoi;
});
return [...new Set(cleanDois)];
};
// src/doi/parse.ts
var ISBN13_REGEX = /97[89][0-9]{10}/;
var parse_default2 = (source) => {
const sanitizedDois = sanitize_default2(source);
if (!sanitizedDois.length) {
return [{
source,
doi: undefined,
isValid: false
}];
}
return sanitizedDois.map((sanitizedDoi) => {
const isValid = validate_default(sanitizedDoi);
const potentialIsbnFragment = sanitizedDoi.replace(/-/g, "");
const potentialIsbn = ISBN13_REGEX.exec(potentialIsbnFragment)?.[0] ?? "";
const isbn = potentialIsbn ? parse_default(potentialIsbn)[0] : { isValid: false };
if (isbn.isValid) {
const [doiWithoutChapter, chapter] = sanitizedDoi.split(/[-_]/).reduce((acc, part, index, array) => {
if (index === array.length - 1 && /^\d+$/.test(part) && /10\.\d{4,9}\/[-._;()/:A-Z0-9]+$/.test(acc[0])) {
acc[1] = part;
} else {
acc[0] += (acc[0] ? "-" : "") + part;
}
return acc;
}, ["", undefined]);
const resolve2 = isValid ? `https://doi.org/${doiWithoutChapter}` : "";
return {
source,
doi: doiWithoutChapter,
isValid,
resolve: resolve2,
isbn: {
...isbn,
chapter
}
};
}
const resolve = isValid ? `https://doi.org/${sanitizedDoi}` : "";
return {
source,
doi: sanitizedDoi,
isValid,
resolve,
isbn
};
});
};
// src/issn/regex.ts
var REGEX_ISSN = /[\d]{4}[-\s]?[\d]{3}[\dX]/;
var regex_default3 = REGEX_ISSN;
// src/issn/sanitize.ts
var issnRegex = new RegExp(regex_default3, "gi");
var sanitize_default3 = (input) => {
if (!input) {
return [];
}
const inputArray = Array.isArray(input) ? input : [...input.split(" "), input];
const cleanIssns = inputArray.reduce((acc, inputString) => {
const matches = inputString.trim().match(issnRegex)?.filter(Boolean);
if (matches) {
matches.forEach((candidate) => {
acc.push(candidate.replace(/[-\s]|^ISSN:?/g, ""));
});
}
return acc;
}, []);
return [...new Set(cleanIssns)];
};
// src/issn/validate.ts
var validate_default3 = (identifier) => {
if (!identifier) {
return false;
}
if (!regex_default3.test(identifier)) {
return false;
}
const chars = sanitize_default3(identifier)[0].split("");
const last = chars.pop();
let sum = 0;
for (let i = 0;i < chars.length; i++) {
sum += parseInt(chars[i], 10) * (8 - i);
}
const remainder = sum % 11;
const checkDigit = remainder === 0 ? "0" : 11 - remainder === 10 ? "X" : (11 - remainder).toString();
return checkDigit === last;
};
// src/issn/parse.ts
var parse_default3 = (source) => {
const sanitizedIssns = sanitize_default3(source);
if (!sanitizedIssns.length) {
return [{
source,
isValid: false
}];
}
return sanitizedIssns.map((sanitizedIssn) => {
const isValid = validate_default3(sanitizedIssn);
return {
source,
isValid,
...isValid && {
issn: sanitizedIssn
}
};
});
};
// src/pmid/regex.ts
var REGEX_PMID = /\b(?:PMID:)?\s?\d{1,8}/;
var REGEX_PMCID = /(?:PMC)\s?\d{1,8}/;
// src/pmid/sanitize.ts
var sanitize_default4 = (input) => {
if (!input) {
return [];
}
const inputString = Array.isArray(input) ? input.join(" ") : input;
const decoded = decodeURIComponent(inputString);
const pmidMatches = decoded.match(new RegExp(REGEX_PMID, "gi")) ?? [];
const pmcidMatches = decoded.match(new RegExp(REGEX_PMCID, "gi")) ?? [];
const matches = [...pmidMatches, ...pmcidMatches].filter(Boolean);
if (!matches.length) {
return [];
}
const cleanPMIDs = matches.map((candidate) => candidate.toString().toUpperCase().trim());
return [...new Set(cleanPMIDs)];
};
// src/pmid/validate.ts
var validate_default4 = (identifier) => {
if (!identifier) {
return false;
}
const pmidRegex = new RegExp(`^${REGEX_PMID.source}$`, "gi");
const pmcidRegex = new RegExp(`^${REGEX_PMCID.source}$`, "gi");
return pmidRegex.test(identifier) || pmcidRegex.test(identifier);
};
// src/pmid/parse.ts
var parse_default4 = (source) => {
const sanitizedPMIDs = sanitize_default4(source);
if (!sanitizedPMIDs.length) {
return [{
source,
pmid: undefined,
pmcid: undefined,
isValid: false
}];
}
return sanitizedPMIDs.map((sanitizedId) => {
const isValid = validate_default4(sanitizedId);
const isPMCID = sanitizedId.startsWith("PMC");
return {
source,
isValid,
...isPMCID ? {
pmcid: sanitizedId,
pmid: undefined,
resolve: `https://www.ncbi.nlm.nih.gov/pmc/articles/${sanitizedId}/`
} : {
pmid: sanitizedId,
pmcid: undefined,
resolve: `https://pubmed.ncbi.nlm.nih.gov/${sanitizedId.replace("PMID: ", "")}/`
}
};
});
};
// src/global/parse.ts
var parse_default5 = (input) => {
const inputArray = Array.isArray(input) ? input : input.split(" ");
return inputArray.map((id) => {
const results = [];
if (validate_default(id)) {
results.push(...parse_default2(id));
}
if (validate_default2(id)) {
results.push(...parse_default(id));
}
if (validate_default3(id)) {
results.push(...parse_default3(id));
}
if (validate_default4(id)) {
results.push(...parse_default4(id));
}
if (results.length === 0) {
results.push({
source: id,
isValid: false
});
}
return results;
}).flat();
};
// src/index.ts
var src_default = {
parseDoi: parse_default2,
sanitizeDoi: sanitize_default2,
validateDoi: validate_default,
parseIsbn: parse_default,
sanitizeIsbn: sanitize_default,
validateIsbn: validate_default2,
parseIssn: parse_default3,
sanitizeIssn: sanitize_default3,
validateIssn: validate_default3,
parsePmid: parse_default4,
sanitizePmid: sanitize_default4,
validatePmid: validate_default4,
parse: parse_default5
};
export {
validate_default4 as validatePmid,
validate_default3 as validateIssn,
validate_default2 as validateIsbn,
validate_default as validateDoi,
sanitize_default4 as sanitizePmid,
sanitize_default3 as sanitizeIssn,
sanitize_default as sanitizeIsbn,
sanitize_default2 as sanitizeDoi,
parse_default4 as parsePmid,
parse_default3 as parseIssn,
parse_default as parseIsbn,
parse_default2 as parseDoi,
parse_default5 as parse,
src_default as default
};
export { parse_default5 as parse_default };