pubmed-fetch
Version:
Typescript version of Bio.Entrez; automating PubMed article and manuscript data retrivial.
209 lines (208 loc) • 7.89 kB
JavaScript
;
var __create = Object.create;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __getProtoOf = Object.getPrototypeOf;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
// If the importer is in node compatibility mode or this is not an ESM
// file that has been converted to a CommonJS file using a Babel-
// compatible transform (i.e. "__esModule" has not been set), then set
// "default" to the CommonJS "module.exports" for node compatibility.
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
mod
));
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/index.ts
var src_exports = {};
__export(src_exports, {
buildQuery: () => buildQuery,
default: () => getIDsAndData,
fetchData: () => fetchData,
fetchIDs: () => fetchIDs,
processData: () => processData
});
module.exports = __toCommonJS(src_exports);
var import_axios = __toESM(require("axios"));
var import_xml2js = __toESM(require("xml2js"));
var BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/";
async function getIDsAndData(query, numPapers, api_key, consolelog) {
try {
const idList = await fetchIDs(query, numPapers, api_key, false);
if (idList && idList.length > 0) {
const data = await fetchData(idList, api_key, false);
const processedData = await processData(data);
if (consolelog) {
console.log(processedData);
}
return processedData;
}
return [];
} catch (error) {
console.error("Error during fetch process: ", error);
return [];
}
}
async function fetchIDs(query, num, api_key, consolelog) {
let idList = [];
for (let attempt = 0; attempt < 3; attempt++) {
try {
const response = await import_axios.default.get(`${BASE_URL}esearch.fcgi?db=pubmed&term=${query}&retmax=${num}&retmode=json&api_key=${api_key}`);
idList = response.data.esearchresult.idlist;
if (consolelog) {
console.log(idList);
}
return idList;
} catch (error) {
console.error(`Error searching IDs, attempt ${attempt + 1}/3. Trying again.`);
await delay(1e3 * Math.pow(2, attempt));
console.error(error);
}
}
return idList;
}
async function fetchData(id_list, api_key, consolelog) {
for (let attempt = 0; attempt < 3; attempt++) {
try {
const response = await import_axios.default.get(`${BASE_URL}efetch.fcgi?db=pubmed&id=${id_list}&retmode=xml&api_key=${api_key}`);
const parser = new import_xml2js.default.Parser({ explicitArray: false, mergeAttrs: true, explicitCharkey: true });
const ret = await parser.parseStringPromise(response.data);
if (consolelog) {
console.log(ret);
}
return ret;
} catch (error) {
console.error(`Error fetching ID data (status 400) ${attempt + 1}/3. Trying again.`);
await delay(1e3 * Math.pow(2, attempt));
}
}
}
async function processData(data) {
try {
const pData = data.PubmedArticleSet.PubmedArticle.map((article) => {
try {
return {
PMID: dataTools.getPMID(article.MedlineCitation.PMID._),
title: article.MedlineCitation.Article.ArticleTitle._,
slug: dataTools.getSlug(article.MedlineCitation.Article.ArticleTitle._),
abstract: article.MedlineCitation.Article.Abstract.AbstractText._ || dataTools.getAbstractText(article.MedlineCitation.Article.Abstract.AbstractText),
authors: dataTools.getAuthors(article.MedlineCitation.Article.AuthorList.Author),
journal: article.MedlineCitation.Article.Journal.Title._,
pubdate: new Date(dataTools.getDate(article.MedlineCitation.Article.Journal.JournalIssue.PubDate)),
keywords: dataTools.getKeywords(article.MedlineCitation),
url: `https://www.ncbi.nlm.nih.gov/pubmed/${article.MedlineCitation.PMID._}`,
affiliations: dataTools.getAffiliations(article.MedlineCitation.Article.AuthorList.Author)
};
} catch (articleError) {
console.error("Error processing article:", article.MedlineCitation.PMID._, article.MedlineCitation.Article.AuthorList.Author[0].LastName._, articleError);
return null;
}
}).filter((article) => article !== null);
return pData;
} catch (error) {
console.error("Error processing data:", error);
return [];
}
}
var dataTools = {
getPMID(entry) {
return Number(entry);
},
getSlug(title) {
let slug = title.toLowerCase();
slug = slug.replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-");
return slug;
},
getAbstractText(entry) {
const text = entry.map((text2) => {
const piece = text2._ || "";
return `${piece}`.trim();
});
return text.join(" ");
},
getAuthors(entry) {
const authors = entry.map((author) => {
var _a, _b;
try {
const lastName = ((_a = author == null ? void 0 : author.LastName) == null ? void 0 : _a._.trim()) || "";
const foreName = ((_b = author == null ? void 0 : author.ForeName) == null ? void 0 : _b._.trim()) || "";
if (lastName && foreName) {
return `${lastName} ${foreName}`;
}
} catch (authorError) {
console.error("Error processing author:", author, authorError);
}
}).filter((name) => name);
return authors;
},
getDate(entry) {
if (entry.Year && entry.Year._) {
const year = entry.Year._;
const month = entry.Month && entry.Month._ || "Jan";
const day = entry.Day && entry.Day._ || "01";
return `${year}-${month}-${day}`.trim();
} else {
return "0000-Jan-01";
}
},
getKeywords(entry) {
if (entry.KeywordList) {
const keywords = entry.KeywordList.Keyword.map((keyword) => {
const k = keyword._ || "";
return `${k}`.trim();
});
return keywords;
}
return [];
},
getAffiliations(entry) {
var _a;
const affiliations = /* @__PURE__ */ new Set();
for (const author of entry) {
if ((author == null ? void 0 : author.AffiliationInfo) && author.AffiliationInfo.Affiliation) {
const affiliation = (_a = author.AffiliationInfo.Affiliation) == null ? void 0 : _a._.trim();
if (affiliation) {
affiliations.add(affiliation);
}
}
}
const uniqueAffiliationsArray = Array.from(affiliations);
return uniqueAffiliationsArray;
}
};
function buildQuery(authors, topics, dateRange) {
let queries = [];
if (authors && authors.length > 0) {
const authorQueries = authors.map((author) => `${author}[Author]`);
queries.push("(" + authorQueries.join(" OR ") + ")");
}
if (topics && topics.length > 0) {
const topicQueries = topics.map((topic) => `${topic}[Title/Abstract]`);
queries.push("(" + topicQueries.join(" OR ") + ")");
}
return queries.join(" AND ") + " AND " + dateRange;
}
async function delay(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
buildQuery,
fetchData,
fetchIDs,
processData
});
//# sourceMappingURL=index.js.map