pubmed-fetch
Version:
Typescript version of Bio.Entrez; automating PubMed article and manuscript data retrivial.
1 lines • 13.1 kB
Source Map (JSON)
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["import axios from 'axios';\nimport xml2js from 'xml2js';\n\nconst BASE_URL = \"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/\"\n\n//npx ts-node src/index\n\n/*\nconst api_key = process.env.NCBI_API_KEY;\nconst authors = ['']\nconst topics = ['RNAi', \"siRNA\", \"ASO\", \"mRNA\"]\nconst dateRange = '(\"2017/09/19\"[Date - Create] : \"2018/10/15\"[Date - Create])'\nconst query = buildQuery(authors, topics, dateRange)\nconst ret = getIDsAndData(query, 15, api_key, true);\nconsole.log(ret)\n*/\n\n// paper data object\ntype PaperData = {\n PMID: number;\n title: string;\n slug: string;\n abstract: string;\n authors: string[];\n journal: string;\n pubdate: Date;\n keywords: string[];\n url: string;\n affiliations: string[];\n};\n\n// main: query of interested paper topics -> prepped data\nexport default async function getIDsAndData(query: string, numPapers: number, api_key: string | undefined, consolelog: boolean): Promise<Array<PaperData>> {\n try {\n const idList = await fetchIDs(query, numPapers, api_key, false);\n\n if (idList && idList.length > 0) {\n const data = await fetchData(idList, api_key, false);\n const processedData = await processData(data);\n if (consolelog) { console.log(processedData) }\n return processedData\n }\n return []\n } catch (error) {\n console.error(\"Error during fetch process: \", error);\n return []\n }\n}\n\n// getting PMIDs based on query\nexport async function fetchIDs(query: string, num: number, api_key: string | undefined, consolelog: boolean): Promise<string[]> {\n let idList: string[] = [];\n for (let attempt = 0; attempt < 3; attempt++) {\n try {\n const response = await axios.get(`${BASE_URL}esearch.fcgi?db=pubmed&term=${query}&retmax=${num}&retmode=json&api_key=${api_key}`);\n idList = response.data.esearchresult.idlist;\n\n if (consolelog) { console.log(idList) }\n\n return idList;\n } catch (error) {\n console.error(`Error searching IDs, attempt ${attempt + 1}/3. Trying again.`);\n await delay(1000 * Math.pow(2, attempt)); // Exponential backoff\n console.error(error)\n }\n }\n return idList;\n}\n\n// getting raw paper data based on PMIDs\nexport async function fetchData(id_list: any, api_key: string | undefined, consolelog: boolean): Promise<any> { //efetches\n for (let attempt = 0; attempt < 3; attempt++) {\n try {\n const response = await axios.get(`${BASE_URL}efetch.fcgi?db=pubmed&id=${id_list}&retmode=xml&api_key=${api_key}`)\n const parser = new xml2js.Parser({ explicitArray: false, mergeAttrs: true, explicitCharkey: true });\n const ret = await parser.parseStringPromise(response.data);\n if (consolelog) { console.log(ret) }\n return ret;\n } catch (error) {\n console.error(`Error fetching ID data (status 400) ${attempt + 1}/3. Trying again.`);\n await delay(1000 * Math.pow(2, attempt));\n }\n }\n}\n\n// formatting raw paper data with interested fields \nexport async function processData(data: any): Promise<Array<PaperData>> {\n try {\n const pData = data.PubmedArticleSet.PubmedArticle.map((article: any) => {\n try {\n return {\n PMID: dataTools.getPMID(article.MedlineCitation.PMID._),\n title: article.MedlineCitation.Article.ArticleTitle._,\n slug: dataTools.getSlug(article.MedlineCitation.Article.ArticleTitle._),\n abstract: article.MedlineCitation.Article.Abstract.AbstractText._ || dataTools.getAbstractText(article.MedlineCitation.Article.Abstract.AbstractText),\n authors: dataTools.getAuthors(article.MedlineCitation.Article.AuthorList.Author),\n journal: article.MedlineCitation.Article.Journal.Title._,\n pubdate: new Date(dataTools.getDate(article.MedlineCitation.Article.Journal.JournalIssue.PubDate)),\n keywords: dataTools.getKeywords(article.MedlineCitation),\n url: `https://www.ncbi.nlm.nih.gov/pubmed/${article.MedlineCitation.PMID._}`,\n affiliations: dataTools.getAffiliations(article.MedlineCitation.Article.AuthorList.Author)\n };\n } catch (articleError) {\n console.error(\"Error processing article:\", article.MedlineCitation.PMID._, article.MedlineCitation.Article.AuthorList.Author[0].LastName._, articleError);\n return null; // skip or return a fallback structure\n }\n }).filter((article: any) => article !== null); // remove any null articles\n return pData;\n\n } catch (error) {\n console.error(\"Error processing data:\", error);\n return []; // return empty array in case of failure\n }\n}\n\n// helper functions for cleaning raw paper data \nconst dataTools = {\n getPMID(entry: any): number { // entry = data.PubmedArticleSet.PubmedArticle[IDX].MedlineCitation.PMID._\n return Number(entry)\n },\n getSlug(title: any): string { //entry = data.PubmedArticleSet.PubmedArticle[IDX].MedlineCitation.Article.ArticleTitle._\n let slug = title.toLowerCase();\n slug = slug.replace(/[^a-z0-9\\s-]/g, '').replace(/\\s+/g, '-').replace(/-+/g, '-');\n return slug;\n },\n getAbstractText(entry: any): string { //entry = data.PubmedArticleSet.PubmedArticle[IDX].MedlineCitation.Article.Abstract.AbtractText\n const text = entry.map((text: { _: string }) => {\n const piece = text._ || '';\n return `${piece}`.trim();\n })\n return text.join(\" \")\n },\n getAuthors(entry: any): string[] { //entry = data.PubmedArticleSet.PubmedArticle[IDX].MedlineCitation.Article.AuthorList.Author\n const authors = entry.map((author: { LastName: { _: string; }; ForeName: { _: string; }; }) => {\n try {\n const lastName = author?.LastName?._.trim() || '';\n const foreName = author?.ForeName?._.trim() || '';\n\n if (lastName && foreName) {\n return `${lastName} ${foreName}`;\n }\n } catch (authorError) {\n console.error(\"Error processing author:\", author, authorError);\n }\n }).filter((name: string) => name);\n return authors\n },\n getDate(entry: any): string { //entry = data.PubmedArticleSet.PubmedArticle[IDX].MedlineCitation.Article.Journal.JournalIssue.PubDate\n if (entry.Year && entry.Year._) {\n const year = entry.Year._\n const month = (entry.Month && entry.Month._) || 'Jan';\n const day = (entry.Day && entry.Day._) || '01';\n return `${year}-${month}-${day}`.trim();\n } else {\n return \"0000-Jan-01\"\n }\n },\n getKeywords(entry: any): string[] { //entry = data.PubmedArticleSet.PubmedArticle[IDX].MedlineCitation.KeywordList.Keyword\n if (entry.KeywordList) {\n const keywords = entry.KeywordList.Keyword.map((keyword: { _: string }) => {\n const k = keyword._ || '';\n return `${k}`.trim();\n })\n return keywords\n }\n return []\n },\n getAffiliations(entry: any): string[] { // entry = data.PubmedArticleSet.PubmedArticle[IDX].MedlineCitation.Article.AuthorList.Author\n const affiliations = new Set<string>();\n for (const author of entry) {\n if (author?.AffiliationInfo && author.AffiliationInfo.Affiliation) {\n const affiliation = author.AffiliationInfo.Affiliation?._.trim();\n if (affiliation) {\n affiliations.add(affiliation);\n }\n }\n }\n const uniqueAffiliationsArray = Array.from(affiliations);\n return uniqueAffiliationsArray;\n }\n}\n\n// building the query string based on user input \nexport function buildQuery(authors: string[], topics: string[], dateRange: string): string {\n let queries: string[] = [];\n\n if (authors && authors.length > 0) {\n const authorQueries = authors.map(author => `${author}[Author]`);\n queries.push('(' + authorQueries.join(' OR ') + ')');\n }\n\n if (topics && topics.length > 0) {\n const topicQueries = topics.map(topic => `${topic}[Title/Abstract]`);\n queries.push('(' + topicQueries.join(' OR ') + ')');\n }\n return queries.join(' AND ') + ' AND ' + dateRange;\n}\n\n// error retry timer\nasync function delay(ms: number) {\n return new Promise(resolve => setTimeout(resolve, ms));\n}"],"mappings":";AAAA,OAAO,WAAW;AAClB,OAAO,YAAY;AAEnB,IAAM,WAAW;AA6BjB,eAAO,cAAqC,OAAe,WAAmB,SAA6B,YAAgD;AACvJ,MAAI;AACA,UAAM,SAAS,MAAM,SAAS,OAAO,WAAW,SAAS,KAAK;AAE9D,QAAI,UAAU,OAAO,SAAS,GAAG;AAC7B,YAAM,OAAO,MAAM,UAAU,QAAQ,SAAS,KAAK;AACnD,YAAM,gBAAgB,MAAM,YAAY,IAAI;AAC5C,UAAI,YAAY;AAAE,gBAAQ,IAAI,aAAa;AAAA,MAAE;AAC7C,aAAO;AAAA,IACX;AACA,WAAO,CAAC;AAAA,EACZ,SAAS,OAAO;AACZ,YAAQ,MAAM,gCAAgC,KAAK;AACnD,WAAO,CAAC;AAAA,EACZ;AACJ;AAGA,eAAsB,SAAS,OAAe,KAAa,SAA6B,YAAwC;AAC5H,MAAI,SAAmB,CAAC;AACxB,WAAS,UAAU,GAAG,UAAU,GAAG,WAAW;AAC1C,QAAI;AACA,YAAM,WAAW,MAAM,MAAM,IAAI,GAAG,QAAQ,+BAA+B,KAAK,WAAW,GAAG,yBAAyB,OAAO,EAAE;AAChI,eAAS,SAAS,KAAK,cAAc;AAErC,UAAI,YAAY;AAAE,gBAAQ,IAAI,MAAM;AAAA,MAAE;AAEtC,aAAO;AAAA,IACX,SAAS,OAAO;AACZ,cAAQ,MAAM,gCAAgC,UAAU,CAAC,mBAAmB;AAC5E,YAAM,MAAM,MAAO,KAAK,IAAI,GAAG,OAAO,CAAC;AACvC,cAAQ,MAAM,KAAK;AAAA,IACvB;AAAA,EACJ;AACA,SAAO;AACX;AAGA,eAAsB,UAAU,SAAc,SAA6B,YAAmC;AAC1G,WAAS,UAAU,GAAG,UAAU,GAAG,WAAW;AAC1C,QAAI;AACA,YAAM,WAAW,MAAM,MAAM,IAAI,GAAG,QAAQ,4BAA4B,OAAO,wBAAwB,OAAO,EAAE;AAChH,YAAM,SAAS,IAAI,OAAO,OAAO,EAAE,eAAe,OAAO,YAAY,MAAM,iBAAiB,KAAK,CAAC;AAClG,YAAM,MAAM,MAAM,OAAO,mBAAmB,SAAS,IAAI;AACzD,UAAI,YAAY;AAAE,gBAAQ,IAAI,GAAG;AAAA,MAAE;AACnC,aAAO;AAAA,IACX,SAAS,OAAO;AACZ,cAAQ,MAAM,uCAAuC,UAAU,CAAC,mBAAmB;AACnF,YAAM,MAAM,MAAO,KAAK,IAAI,GAAG,OAAO,CAAC;AAAA,IAC3C;AAAA,EACJ;AACJ;AAGA,eAAsB,YAAY,MAAsC;AACpE,MAAI;AACA,UAAM,QAAQ,KAAK,iBAAiB,cAAc,IAAI,CAAC,YAAiB;AACpE,UAAI;AACA,eAAO;AAAA,UACH,MAAM,UAAU,QAAQ,QAAQ,gBAAgB,KAAK,CAAC;AAAA,UACtD,OAAO,QAAQ,gBAAgB,QAAQ,aAAa;AAAA,UACpD,MAAM,UAAU,QAAQ,QAAQ,gBAAgB,QAAQ,aAAa,CAAC;AAAA,UACtE,UAAU,QAAQ,gBAAgB,QAAQ,SAAS,aAAa,KAAK,UAAU,gBAAgB,QAAQ,gBAAgB,QAAQ,SAAS,YAAY;AAAA,UACpJ,SAAS,UAAU,WAAW,QAAQ,gBAAgB,QAAQ,WAAW,MAAM;AAAA,UAC/E,SAAS,QAAQ,gBAAgB,QAAQ,QAAQ,MAAM;AAAA,UACvD,SAAS,IAAI,KAAK,UAAU,QAAQ,QAAQ,gBAAgB,QAAQ,QAAQ,aAAa,OAAO,CAAC;AAAA,UACjG,UAAU,UAAU,YAAY,QAAQ,eAAe;AAAA,UACvD,KAAK,uCAAuC,QAAQ,gBAAgB,KAAK,CAAC;AAAA,UAC1E,cAAc,UAAU,gBAAgB,QAAQ,gBAAgB,QAAQ,WAAW,MAAM;AAAA,QAC7F;AAAA,MACJ,SAAS,cAAc;AACnB,gBAAQ,MAAM,6BAA6B,QAAQ,gBAAgB,KAAK,GAAG,QAAQ,gBAAgB,QAAQ,WAAW,OAAO,CAAC,EAAE,SAAS,GAAG,YAAY;AACxJ,eAAO;AAAA,MACX;AAAA,IACJ,CAAC,EAAE,OAAO,CAAC,YAAiB,YAAY,IAAI;AAC5C,WAAO;AAAA,EAEX,SAAS,OAAO;AACZ,YAAQ,MAAM,0BAA0B,KAAK;AAC7C,WAAO,CAAC;AAAA,EACZ;AACJ;AAGA,IAAM,YAAY;AAAA,EACd,QAAQ,OAAoB;AACxB,WAAO,OAAO,KAAK;AAAA,EACvB;AAAA,EACA,QAAQ,OAAoB;AACxB,QAAI,OAAO,MAAM,YAAY;AAC7B,WAAO,KAAK,QAAQ,iBAAiB,EAAE,EAAE,QAAQ,QAAQ,GAAG,EAAE,QAAQ,OAAO,GAAG;AAChF,WAAO;AAAA,EACX;AAAA,EACA,gBAAgB,OAAoB;AAChC,UAAM,OAAO,MAAM,IAAI,CAACA,UAAwB;AAC5C,YAAM,QAAQA,MAAK,KAAK;AACxB,aAAO,GAAG,KAAK,GAAG,KAAK;AAAA,IAC3B,CAAC;AACD,WAAO,KAAK,KAAK,GAAG;AAAA,EACxB;AAAA,EACA,WAAW,OAAsB;AAC7B,UAAM,UAAU,MAAM,IAAI,CAAC,WAAoE;AArIvG;AAsIY,UAAI;AACA,cAAM,aAAW,sCAAQ,aAAR,mBAAkB,EAAE,WAAU;AAC/C,cAAM,aAAW,sCAAQ,aAAR,mBAAkB,EAAE,WAAU;AAE/C,YAAI,YAAY,UAAU;AACtB,iBAAO,GAAG,QAAQ,IAAI,QAAQ;AAAA,QAClC;AAAA,MACJ,SAAS,aAAa;AAClB,gBAAQ,MAAM,4BAA4B,QAAQ,WAAW;AAAA,MACjE;AAAA,IACJ,CAAC,EAAE,OAAO,CAAC,SAAiB,IAAI;AAChC,WAAO;AAAA,EACX;AAAA,EACA,QAAQ,OAAoB;AACxB,QAAI,MAAM,QAAQ,MAAM,KAAK,GAAG;AAC5B,YAAM,OAAO,MAAM,KAAK;AACxB,YAAM,QAAS,MAAM,SAAS,MAAM,MAAM,KAAM;AAChD,YAAM,MAAO,MAAM,OAAO,MAAM,IAAI,KAAM;AAC1C,aAAO,GAAG,IAAI,IAAI,KAAK,IAAI,GAAG,GAAG,KAAK;AAAA,IAC1C,OAAO;AACH,aAAO;AAAA,IACX;AAAA,EACJ;AAAA,EACA,YAAY,OAAsB;AAC9B,QAAI,MAAM,aAAa;AACnB,YAAM,WAAW,MAAM,YAAY,QAAQ,IAAI,CAAC,YAA2B;AACvE,cAAM,IAAI,QAAQ,KAAK;AACvB,eAAO,GAAG,CAAC,GAAG,KAAK;AAAA,MACvB,CAAC;AACD,aAAO;AAAA,IACX;AACA,WAAO,CAAC;AAAA,EACZ;AAAA,EACA,gBAAgB,OAAsB;AAvK1C;AAwKQ,UAAM,eAAe,oBAAI,IAAY;AACrC,eAAW,UAAU,OAAO;AACxB,WAAI,iCAAQ,oBAAmB,OAAO,gBAAgB,aAAa;AAC/D,cAAM,eAAc,YAAO,gBAAgB,gBAAvB,mBAAoC,EAAE;AAC1D,YAAI,aAAa;AACb,uBAAa,IAAI,WAAW;AAAA,QAChC;AAAA,MACJ;AAAA,IACJ;AACA,UAAM,0BAA0B,MAAM,KAAK,YAAY;AACvD,WAAO;AAAA,EACX;AACJ;AAGO,SAAS,WAAW,SAAmB,QAAkB,WAA2B;AACvF,MAAI,UAAoB,CAAC;AAEzB,MAAI,WAAW,QAAQ,SAAS,GAAG;AAC/B,UAAM,gBAAgB,QAAQ,IAAI,YAAU,GAAG,MAAM,UAAU;AAC/D,YAAQ,KAAK,MAAM,cAAc,KAAK,MAAM,IAAI,GAAG;AAAA,EACvD;AAEA,MAAI,UAAU,OAAO,SAAS,GAAG;AAC7B,UAAM,eAAe,OAAO,IAAI,WAAS,GAAG,KAAK,kBAAkB;AACnE,YAAQ,KAAK,MAAM,aAAa,KAAK,MAAM,IAAI,GAAG;AAAA,EACtD;AACA,SAAO,QAAQ,KAAK,OAAO,IAAI,UAAU;AAC7C;AAGA,eAAe,MAAM,IAAY;AAC7B,SAAO,IAAI,QAAQ,aAAW,WAAW,SAAS,EAAE,CAAC;AACzD;","names":["text"]}