pubmed-fetch
Version:
Typescript version of Bio.Entrez; automating PubMed article and manuscript data retrivial.
171 lines • 6.12 kB
JavaScript
// src/index.ts
import axios from "axios";
import xml2js from "xml2js";
var BASE_URL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/";
async function getIDsAndData(query, numPapers, api_key, consolelog) {
try {
const idList = await fetchIDs(query, numPapers, api_key, false);
if (idList && idList.length > 0) {
const data = await fetchData(idList, api_key, false);
const processedData = await processData(data);
if (consolelog) {
console.log(processedData);
}
return processedData;
}
return [];
} catch (error) {
console.error("Error during fetch process: ", error);
return [];
}
}
async function fetchIDs(query, num, api_key, consolelog) {
let idList = [];
for (let attempt = 0; attempt < 3; attempt++) {
try {
const response = await axios.get(`${BASE_URL}esearch.fcgi?db=pubmed&term=${query}&retmax=${num}&retmode=json&api_key=${api_key}`);
idList = response.data.esearchresult.idlist;
if (consolelog) {
console.log(idList);
}
return idList;
} catch (error) {
console.error(`Error searching IDs, attempt ${attempt + 1}/3. Trying again.`);
await delay(1e3 * Math.pow(2, attempt));
console.error(error);
}
}
return idList;
}
async function fetchData(id_list, api_key, consolelog) {
for (let attempt = 0; attempt < 3; attempt++) {
try {
const response = await axios.get(`${BASE_URL}efetch.fcgi?db=pubmed&id=${id_list}&retmode=xml&api_key=${api_key}`);
const parser = new xml2js.Parser({ explicitArray: false, mergeAttrs: true, explicitCharkey: true });
const ret = await parser.parseStringPromise(response.data);
if (consolelog) {
console.log(ret);
}
return ret;
} catch (error) {
console.error(`Error fetching ID data (status 400) ${attempt + 1}/3. Trying again.`);
await delay(1e3 * Math.pow(2, attempt));
}
}
}
async function processData(data) {
try {
const pData = data.PubmedArticleSet.PubmedArticle.map((article) => {
try {
return {
PMID: dataTools.getPMID(article.MedlineCitation.PMID._),
title: article.MedlineCitation.Article.ArticleTitle._,
slug: dataTools.getSlug(article.MedlineCitation.Article.ArticleTitle._),
abstract: article.MedlineCitation.Article.Abstract.AbstractText._ || dataTools.getAbstractText(article.MedlineCitation.Article.Abstract.AbstractText),
authors: dataTools.getAuthors(article.MedlineCitation.Article.AuthorList.Author),
journal: article.MedlineCitation.Article.Journal.Title._,
pubdate: new Date(dataTools.getDate(article.MedlineCitation.Article.Journal.JournalIssue.PubDate)),
keywords: dataTools.getKeywords(article.MedlineCitation),
url: `https://www.ncbi.nlm.nih.gov/pubmed/${article.MedlineCitation.PMID._}`,
affiliations: dataTools.getAffiliations(article.MedlineCitation.Article.AuthorList.Author)
};
} catch (articleError) {
console.error("Error processing article:", article.MedlineCitation.PMID._, article.MedlineCitation.Article.AuthorList.Author[0].LastName._, articleError);
return null;
}
}).filter((article) => article !== null);
return pData;
} catch (error) {
console.error("Error processing data:", error);
return [];
}
}
var dataTools = {
getPMID(entry) {
return Number(entry);
},
getSlug(title) {
let slug = title.toLowerCase();
slug = slug.replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-");
return slug;
},
getAbstractText(entry) {
const text = entry.map((text2) => {
const piece = text2._ || "";
return `${piece}`.trim();
});
return text.join(" ");
},
getAuthors(entry) {
const authors = entry.map((author) => {
var _a, _b;
try {
const lastName = ((_a = author == null ? void 0 : author.LastName) == null ? void 0 : _a._.trim()) || "";
const foreName = ((_b = author == null ? void 0 : author.ForeName) == null ? void 0 : _b._.trim()) || "";
if (lastName && foreName) {
return `${lastName} ${foreName}`;
}
} catch (authorError) {
console.error("Error processing author:", author, authorError);
}
}).filter((name) => name);
return authors;
},
getDate(entry) {
if (entry.Year && entry.Year._) {
const year = entry.Year._;
const month = entry.Month && entry.Month._ || "Jan";
const day = entry.Day && entry.Day._ || "01";
return `${year}-${month}-${day}`.trim();
} else {
return "0000-Jan-01";
}
},
getKeywords(entry) {
if (entry.KeywordList) {
const keywords = entry.KeywordList.Keyword.map((keyword) => {
const k = keyword._ || "";
return `${k}`.trim();
});
return keywords;
}
return [];
},
getAffiliations(entry) {
var _a;
const affiliations = /* @__PURE__ */ new Set();
for (const author of entry) {
if ((author == null ? void 0 : author.AffiliationInfo) && author.AffiliationInfo.Affiliation) {
const affiliation = (_a = author.AffiliationInfo.Affiliation) == null ? void 0 : _a._.trim();
if (affiliation) {
affiliations.add(affiliation);
}
}
}
const uniqueAffiliationsArray = Array.from(affiliations);
return uniqueAffiliationsArray;
}
};
function buildQuery(authors, topics, dateRange) {
let queries = [];
if (authors && authors.length > 0) {
const authorQueries = authors.map((author) => `${author}[Author]`);
queries.push("(" + authorQueries.join(" OR ") + ")");
}
if (topics && topics.length > 0) {
const topicQueries = topics.map((topic) => `${topic}[Title/Abstract]`);
queries.push("(" + topicQueries.join(" OR ") + ")");
}
return queries.join(" AND ") + " AND " + dateRange;
}
async function delay(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
export {
buildQuery,
getIDsAndData as default,
fetchData,
fetchIDs,
processData
};
//# sourceMappingURL=index.mjs.map