UNPKG

pubmed-fetch

Version:

Typescript version of Bio.Entrez; automating PubMed article and manuscript data retrivial.

209 lines (208 loc) 7.89 kB
"use strict"; var __create = Object.create; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __getProtoOf = Object.getPrototypeOf; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps( // If the importer is in node compatibility mode or this is not an ESM // file that has been converted to a CommonJS file using a Babel- // compatible transform (i.e. "__esModule" has not been set), then set // "default" to the CommonJS "module.exports" for node compatibility. isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target, mod )); var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var src_exports = {}; __export(src_exports, { buildQuery: () => buildQuery, default: () => getIDsAndData, fetchData: () => fetchData, fetchIDs: () => fetchIDs, processData: () => processData }); module.exports = __toCommonJS(src_exports); var import_axios = __toESM(require("axios")); var import_xml2js = __toESM(require("xml2js")); var BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; async function getIDsAndData(query, numPapers, api_key, consolelog) { try { const idList = await fetchIDs(query, numPapers, api_key, false); if (idList && idList.length > 0) { const data = await fetchData(idList, api_key, false); const processedData = await processData(data); if (consolelog) { console.log(processedData); } return processedData; } return []; } catch (error) { console.error("Error during fetch process: ", error); return []; } } async function fetchIDs(query, num, api_key, consolelog) { let idList = []; for (let attempt = 0; attempt < 3; attempt++) { try { const response = await import_axios.default.get(`${BASE_URL}esearch.fcgi?db=pubmed&term=${query}&retmax=${num}&retmode=json&api_key=${api_key}`); idList = response.data.esearchresult.idlist; if (consolelog) { console.log(idList); } return idList; } catch (error) { console.error(`Error searching IDs, attempt ${attempt + 1}/3. Trying again.`); await delay(1e3 * Math.pow(2, attempt)); console.error(error); } } return idList; } async function fetchData(id_list, api_key, consolelog) { for (let attempt = 0; attempt < 3; attempt++) { try { const response = await import_axios.default.get(`${BASE_URL}efetch.fcgi?db=pubmed&id=${id_list}&retmode=xml&api_key=${api_key}`); const parser = new import_xml2js.default.Parser({ explicitArray: false, mergeAttrs: true, explicitCharkey: true }); const ret = await parser.parseStringPromise(response.data); if (consolelog) { console.log(ret); } return ret; } catch (error) { console.error(`Error fetching ID data (status 400) ${attempt + 1}/3. Trying again.`); await delay(1e3 * Math.pow(2, attempt)); } } } async function processData(data) { try { const pData = data.PubmedArticleSet.PubmedArticle.map((article) => { try { return { PMID: dataTools.getPMID(article.MedlineCitation.PMID._), title: article.MedlineCitation.Article.ArticleTitle._, slug: dataTools.getSlug(article.MedlineCitation.Article.ArticleTitle._), abstract: article.MedlineCitation.Article.Abstract.AbstractText._ || dataTools.getAbstractText(article.MedlineCitation.Article.Abstract.AbstractText), authors: dataTools.getAuthors(article.MedlineCitation.Article.AuthorList.Author), journal: article.MedlineCitation.Article.Journal.Title._, pubdate: new Date(dataTools.getDate(article.MedlineCitation.Article.Journal.JournalIssue.PubDate)), keywords: dataTools.getKeywords(article.MedlineCitation), url: `https://www.ncbi.nlm.nih.gov/pubmed/${article.MedlineCitation.PMID._}`, affiliations: dataTools.getAffiliations(article.MedlineCitation.Article.AuthorList.Author) }; } catch (articleError) { console.error("Error processing article:", article.MedlineCitation.PMID._, article.MedlineCitation.Article.AuthorList.Author[0].LastName._, articleError); return null; } }).filter((article) => article !== null); return pData; } catch (error) { console.error("Error processing data:", error); return []; } } var dataTools = { getPMID(entry) { return Number(entry); }, getSlug(title) { let slug = title.toLowerCase(); slug = slug.replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-"); return slug; }, getAbstractText(entry) { const text = entry.map((text2) => { const piece = text2._ || ""; return `${piece}`.trim(); }); return text.join(" "); }, getAuthors(entry) { const authors = entry.map((author) => { var _a, _b; try { const lastName = ((_a = author == null ? void 0 : author.LastName) == null ? void 0 : _a._.trim()) || ""; const foreName = ((_b = author == null ? void 0 : author.ForeName) == null ? void 0 : _b._.trim()) || ""; if (lastName && foreName) { return `${lastName} ${foreName}`; } } catch (authorError) { console.error("Error processing author:", author, authorError); } }).filter((name) => name); return authors; }, getDate(entry) { if (entry.Year && entry.Year._) { const year = entry.Year._; const month = entry.Month && entry.Month._ || "Jan"; const day = entry.Day && entry.Day._ || "01"; return `${year}-${month}-${day}`.trim(); } else { return "0000-Jan-01"; } }, getKeywords(entry) { if (entry.KeywordList) { const keywords = entry.KeywordList.Keyword.map((keyword) => { const k = keyword._ || ""; return `${k}`.trim(); }); return keywords; } return []; }, getAffiliations(entry) { var _a; const affiliations = /* @__PURE__ */ new Set(); for (const author of entry) { if ((author == null ? void 0 : author.AffiliationInfo) && author.AffiliationInfo.Affiliation) { const affiliation = (_a = author.AffiliationInfo.Affiliation) == null ? void 0 : _a._.trim(); if (affiliation) { affiliations.add(affiliation); } } } const uniqueAffiliationsArray = Array.from(affiliations); return uniqueAffiliationsArray; } }; function buildQuery(authors, topics, dateRange) { let queries = []; if (authors && authors.length > 0) { const authorQueries = authors.map((author) => `${author}[Author]`); queries.push("(" + authorQueries.join(" OR ") + ")"); } if (topics && topics.length > 0) { const topicQueries = topics.map((topic) => `${topic}[Title/Abstract]`); queries.push("(" + topicQueries.join(" OR ") + ")"); } return queries.join(" AND ") + " AND " + dateRange; } async function delay(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { buildQuery, fetchData, fetchIDs, processData }); //# sourceMappingURL=index.js.map