UNPKG

@sickrin/openalex-sdk

Version:

A TypeScript SDK for interacting with the OpenAlex API - forked and enhanced version

760 lines (708 loc) 21.5 kB
// Empty file to make the 'types' directory a module. import { AxiosResponse } from 'axios'; import fs from 'fs'; import { Author, Authors, AuthorsSearchParameters, ExternalIdsAuthor, } from './types/author'; import { ExternalIdsInstitution, Institutions, SearchParametersInstitution, } from './types/institution'; import { ExternalIdsSource, SearchParametersSource, Sources, } from './types/source'; import { SearchParametersTopics, Topic, Topics } from './types/topic'; import { ExternalIdsWork, SearchParameters, Work, Works } from './types/work'; import { handleAllAuthorsPages, handleMultipleAuthorsPages, validateAuthorParameters, } from './utils/authors'; import { convertToCSV } from './utils/exportCSV'; import { appendCursorToUrl, buildUrl, convertAbstractArrayToString, getCursorByPage, } from './utils/helpers'; import { GET } from './utils/http'; import { handleAllInstitutionsPages, handleMultipleInstitutionsPages, validateInstitutionsParameters, } from './utils/institutions'; import { handleAllSourcesPages, handleMultipleSourcesPages, validateSourcesParameters, } from './utils/sources'; import { handleAllTopicsPages, handleMultipleTopicsPages, validateTopicsParameters, } from './utils/topics'; import { handleAllPages, handleAllPagesInChunks, handleMultiplePages, validateParameters, } from './utils/works'; export default class OpenAlex { email: string | null; apiKey: string | null; url: string = 'https://api.openalex.org'; maxRetries: number = 3; retryDelay: number = 1000; retryHttpCodes: number[] = [429, 500, 502, 503, 504]; constructor(email: string | null = null, apiKey: string | null = null) { this.email = email; this.apiKey = apiKey; } /** * The function `work` retrieves a specific work by its ID and returns it as a Promise. * @param {string} id - The `id` parameter is a string that represents the unique identifier of a * work. It is used to retrieve a specific work from the server. * @returns {Promise<Work>}a Promise that resolves to a Work object. */ async work(id: string, externalIds?: ExternalIdsWork): Promise<Work> { let url = ''; if (externalIds) url = `${this.url}/works/${externalIds}:${id}`; else url = `${this.url}/works/${id}`; const response: AxiosResponse<Work> = await GET(url); if (response.status === 200) { return response.data; } if (response.status === 404) { console.log(`Work with id ${id} not found`); } else { console.log(`Error ${response.status}: ${response.statusText}`); } // return empty work return { id: id, biblio: {}, counts_by_year: [], }; } /** * The function `works` retrieves a list of works and returns them as a Promise. * @param {SearchParameters} searchParameters - The `searchParameters` parameter is an object that * contains the parameters used to search for works. It is used to retrieve a list of works from the server. * - `search` is a string that represents the search query. * - `searchField` is a string that represents the field to search in. * - `perPage` is a number that represents the number of works to retrieve per page. * - `page` is a number that represents the page number to retrieve. * - `retriveAllPages` is a boolean that represents whether to retrieve all pages. * - `toCsv` is a string that represents the name of the CSV file to save the results to. * - `toJson` is a string that represents the name of the JSON file to save the results to. * - `startPage` is a number that represents the start page to retrieve. * - `endPage` is a number that represents the end page to retrieve. * - `filter` is an object that represents the filter parameters to use. * - `groupBy` is a string that represents the field to group by. * - `sortBy` is an object that represents the field to sort by. * * @remarks Don't use `startPage` and `endPage` with `retriveAllPages` at the same time. * * @returns {Promise<Works>} a Promise that resolves to a Works object. * * @see {@link SearchParameters} for the search parameters. * @see {@link Works} for the returned data structure. * * @throws {Error} if the response status is not 200. * * @default * perPage=25 * page=1 * retriveAllPages=false * * @example * const res = await openAlex.works({ * search: 'education', * searchField: 'title', * perPage: 1, * filter: { * has_fulltext: true, * }, * toCsv: 'test100', * startPage: 1, * endPage: 2, * }); * * @example * const res = await openAlex.works({ * search: 'education', * searchField: 'title', * perPage: 50, * filter: { * has_fulltext: true, * }, * toJson: 'test100', * page: 20, * groupBy: 'publication_year', * sortBy: { * field: 'display_name', * order: 'desc', * }, * }); * @see {@link https://docs.openalex.org/api-entities/works/search-works OpenAlex API Documentation } * for more information about the works endpoint. */ async works( searchParameters: SearchParameters = { perPage: 25, page: 1, retriveAllPages: false, }, ): Promise<Works> { const { retriveAllPages, searchField, search, toJson, toCsv, startPage, endPage, filter, groupBy: group_by, sortBy, AbstractArrayToString, chunkSize, } = searchParameters; let { perPage } = searchParameters; let { page } = searchParameters; validateParameters( retriveAllPages, startPage, endPage, searchField, chunkSize, toCsv, toJson, ); let url = buildUrl( this.url, 'works', search, searchField, filter, group_by, sortBy, ); let cursor = await getCursorByPage(url, page, perPage); if (retriveAllPages) { perPage = 200; cursor = '*'; } if (startPage && endPage) { page = startPage; cursor = await getCursorByPage(url, startPage, perPage); } url = appendCursorToUrl(url, perPage, cursor, retriveAllPages); const response: AxiosResponse<Works> = await GET(url); if (response.status === 200) { response.data.meta.page = page ?? 1; if (AbstractArrayToString) { response.data.results = response.data.results.map((work) => { if (work.abstract_inverted_index) work.abstract = convertAbstractArrayToString( work.abstract_inverted_index, ); delete work.abstract_inverted_index; return work; }); } if (startPage && endPage) { return handleMultiplePages( startPage, endPage, url, response, toJson, toCsv, AbstractArrayToString, ); } if (retriveAllPages) { if (chunkSize) handleAllPagesInChunks( url, response, toJson, toCsv, AbstractArrayToString, chunkSize, ); else return handleAllPages( url, response, toJson, toCsv, AbstractArrayToString, ); } if (toJson) fs.writeFileSync( `${toJson}.json`, JSON.stringify(response.data, null, 2), ); if (toCsv) { convertToCSV(response.data.results, toCsv); } response.data.meta.url = url; return response.data; } else { throw new Error(`Error ${response.status}: ${response.statusText}`); } } /** * The function `autoCompleteWorks` retrieves a list of works that match the search query * and returns them as a Promise. * @param {string} search - The `search` parameter is a string that represents the search query. * It is used to retrieve a list of works that match the search query from the server. * @returns {Promise<Works>} a Promise that resolves to a Works object. * @throws {Error} if the response status is not 200. * @example * const res = await openAlex.autoCompleteWorks('education'); * @see {@link https://docs.openalex.org/how-to-use-the-api/get-lists-of-entities/autocomplete-entities OpenAlex API Documentation } * for more information about the autocomplete endpoint. */ async autoCompleteWorks(search: string): Promise<Works> { const response: AxiosResponse<Works> = await GET( `${this.url}/autocomplete/works?q=${search}`, ); if (response.status === 200) { return response.data; } else { throw new Error(`Error ${response.status}: ${response.statusText}`); } } /** * The function `ngram` retrieves a list of ngrams for a specific work by its ID and returns them as a Promise. * * @param {string} id - The `id` parameter is a string that represents the unique identifier of a * work. It is used to retrieve a list of ngrams for a specific work from the server. * @throws {Error} if the response status is not 200. * @example * const res = await openAlex.ngram('work_id'); * @see {@link https://docs.openalex.org/api-entities/works/get-n-grams OpenAlex API Documentation } * for more information about the ngram endpoint. */ async ngram(id: string) { const response: AxiosResponse<Work> = await GET( `${this.url}/works/${id}/ngram`, ); if (response.status === 200) { return response.data; } else { throw new Error(`Error ${response.status}: ${response.statusText}`); } } /** * The function `author` retrieves a specific author by its ID and returns them as a Promise. * @param {string} id - The `id` parameter is a string that represents the unique identifier of an * author. It is used to retrieve a specific author from the server. * @throws {Error} if the response status is not 200. * @example * const res = await openAlex.author('author_id'); * @see {@link https://docs.openalex.org/api-entities/authors/get-authors OpenAlex API Documentation } * for more information about the author endpoint. */ async author(id: string, externalIds?: ExternalIdsAuthor) { let url = ''; if (externalIds) url = `${this.url}/authors/${externalIds}:${id}`; else url = `${this.url}/authors/${id}`; const response: AxiosResponse<Author> = await GET(url); if (response.status === 200) { return response.data; } else { throw new Error(`Error ${response.status}: ${response.statusText}`); } } /** * The function `authors` retrieves a list of authors and returns them as a Promise. * @param {SearchParameters} searchParameters - The `searchParameters` parameter is an object that * contains the parameters used to search for authors. It is used to retrieve a list of authors from the server. * - `search` is a string that represents the search query. * - `searchField` is a string that represents the field to search in. * - `perPage` is a number that represents the number of authors to retrieve per page. * - `page` is a number that represents the page number to retrieve. * - `retriveAllPages` is a boolean that represents whether to retrieve all pages. * - `toCsv` is a string that represents the name of the CSV file to save the results to. * - `toJson` is a string that represents the name of the JSON file to save the results to. * - `startPage` is a number that represents the start page to retrieve. * - `endPage` is a number that represents the end page to retrieve. * - `filter` is an object that represents the filter parameters to use. * - `groupBy` is a string that represents the field to group by. * - `sortBy` is an object that represents the field to sort by. * * @remarks Don't use `startPage` and `endPage` with `retriveAllPages` at the same time. * * @returns {Promise<Authors>} a Promise that resolves to a Authors object. * * @see {@link SearchParameters} for the search parameters. * @see {@link Authors} for the returned data structure. * * @throws {Error} if the response status is not 200. * * @default * perPage=25 * page=1 * retriveAllPages=false * * @example * const res = await openAlex.authors({ * search: 'education', * searchField: 'title', * perPage: 1, * filter: { * has_fulltext: true, * }, * toCsv: 'test100', * startPage: * endPage: 2, * }); * @example * const res = await openAlex.authors({ * search: 'education', * searchField: 'title', * perPage: 50, * filter: { * has_fulltext: true, * }, * toJson: 'test100', * page: 20, * groupBy: 'publication_year', * sortBy: { * field: 'display_name', * order: 'desc', * }, * }); * @see {@link https://docs.openalex.org/api-entities/authors/search-authors OpenAlex API Documentation } * for more information about the authors endpoint. * */ async authors( searchParameters: AuthorsSearchParameters = { perPage: 25, page: 1, retriveAllPages: false, }, ) { const { retriveAllPages, searchField, search, toJson, toCsv, startPage, endPage, filter, groupBy, sortBy, } = searchParameters; let { perPage } = searchParameters; let { page } = searchParameters; validateAuthorParameters(retriveAllPages, startPage, endPage, searchField); let url = buildUrl( this.url, 'authors', search, searchField, filter, groupBy, sortBy, ); let cursor = await getCursorByPage(url, page, perPage); if (retriveAllPages) { perPage = 200; cursor = '*'; } if (startPage && endPage) { page = startPage; cursor = await getCursorByPage(url, startPage, perPage); } url = appendCursorToUrl(url, perPage, cursor, retriveAllPages); const response: AxiosResponse<Authors> = await GET(url); if (response.status === 200) { response.data.meta.page = page ?? 1; if (startPage && endPage) { return handleMultipleAuthorsPages( startPage, endPage, url, response, toJson, toCsv, ); } if (retriveAllPages) { return handleAllAuthorsPages(url, response, toJson, toCsv); } if (toJson) fs.writeFileSync( `${toJson}.json`, JSON.stringify(response.data, null, 2), ); if (toCsv) { convertToCSV(response.data.results, toCsv); } return response.data; } else { throw new Error(`Error ${response.status}: ${response.statusText}`); } } async source(id: string, externalIds?: ExternalIdsSource) { let url = ''; if (externalIds) url = `${this.url}/sources/${externalIds}:${id}`; else url = `${this.url}/sources/${id}`; const response: AxiosResponse<Author> = await GET(url); if (response.status === 200) { return response.data; } else { throw new Error(`Error ${response.status}: ${response.statusText}`); } } async sources( searchParameters: SearchParametersSource = { perPage: 25, page: 1, retriveAllPages: false, }, ) { const { retriveAllPages, searchField, search, toJson, toCsv, startPage, endPage, filter, groupBy, sortBy, } = searchParameters; let { perPage } = searchParameters; let { page } = searchParameters; validateSourcesParameters(retriveAllPages, startPage, endPage, searchField); let url = buildUrl( this.url, 'sources', search, searchField, filter, groupBy, sortBy, ); let cursor = await getCursorByPage(url, page, perPage); if (retriveAllPages) { perPage = 200; cursor = '*'; } if (startPage && endPage) { page = startPage; cursor = await getCursorByPage(url, startPage, perPage); } url = appendCursorToUrl(url, perPage, cursor, retriveAllPages); const response: AxiosResponse<Sources> = await GET(url); if (response.status === 200) { response.data.meta.page = page ?? 1; if (startPage && endPage) { return handleMultipleSourcesPages( startPage, endPage, url, response, toJson, toCsv, ); } if (retriveAllPages) { return handleAllSourcesPages(url, response, toJson, toCsv); } if (toJson) fs.writeFileSync( `${toJson}.json`, JSON.stringify(response.data, null, 2), ); if (toCsv) { convertToCSV(response.data.results, toCsv); } return response.data; } else { throw new Error(`Error ${response.status}: ${response.statusText}`); } } async institution(id: string, externalIds?: ExternalIdsInstitution) { let url = ''; if (externalIds) url = `${this.url}/institutions/${externalIds}:${id}`; else url = `${this.url}/institutions/${id}`; const response: AxiosResponse<Author> = await GET(url); if (response.status === 200) { return response.data; } else { throw new Error(`Error ${response.status}: ${response.statusText}`); } } async institutions( searchParameters: SearchParametersInstitution = { perPage: 25, page: 1, retriveAllPages: false, }, ) { const { retriveAllPages, searchField, search, toJson, toCsv, startPage, endPage, filter, groupBy, sortBy, } = searchParameters; let { perPage } = searchParameters; let { page } = searchParameters; validateInstitutionsParameters( retriveAllPages, startPage, endPage, searchField, ); let url = buildUrl( this.url, 'institutions', search, searchField, filter, groupBy, sortBy, ); let cursor = await getCursorByPage(url, page, perPage); if (retriveAllPages) { perPage = 200; cursor = '*'; } if (startPage && endPage) { page = startPage; cursor = await getCursorByPage(url, startPage, perPage); } url = appendCursorToUrl(url, perPage, cursor, retriveAllPages); const response: AxiosResponse<Institutions> = await GET(url); if (response.status === 200) { response.data.meta.page = page ?? 1; if (startPage && endPage) { return handleMultipleInstitutionsPages( startPage, endPage, url, response, toJson, toCsv, ); } if (retriveAllPages) { return handleAllInstitutionsPages(url, response, toJson, toCsv); } if (toJson) fs.writeFileSync( `${toJson}.json`, JSON.stringify(response.data, null, 2), ); if (toCsv) { convertToCSV(response.data.results, toCsv); } return response.data; } else { throw new Error(`Error ${response.status}: ${response.statusText}`); } } async topic(id: string) { if (!id) throw new Error('Topic id is required'); const response: AxiosResponse<Topic> = await GET( `${this.url}/topics/${id}`, ); if (response.status === 200) { return response.data; } else { throw new Error(`Error ${response.status}: ${response.statusText}`); } } async topics( searchParameters: SearchParametersTopics = { perPage: 25, page: 1, retriveAllPages: false, }, ) { const { retriveAllPages, searchField, search, toJson, toCsv, startPage, endPage, filter, groupBy, sortBy, } = searchParameters; let { perPage } = searchParameters; let { page } = searchParameters; validateTopicsParameters(retriveAllPages, startPage, endPage, searchField); let url = buildUrl( this.url, 'topics', search, searchField, filter, groupBy, sortBy, ); let cursor = await getCursorByPage(url, page, perPage); if (retriveAllPages) { perPage = 200; cursor = '*'; } if (startPage && endPage) { page = startPage; cursor = await getCursorByPage(url, startPage, perPage); } url = appendCursorToUrl(url, perPage, cursor, retriveAllPages); const response: AxiosResponse<Topics> = await GET(url); if (response.status === 200) { response.data.meta.page = page ?? 1; if (startPage && endPage) { return handleMultipleTopicsPages( startPage, endPage, url, response, toJson, toCsv, ); } if (retriveAllPages) { return handleAllTopicsPages(url, response, toJson, toCsv); } if (toJson) fs.writeFileSync( `${toJson}.json`, JSON.stringify(response.data, null, 2), ); if (toCsv) { convertToCSV(response.data.results, toCsv); } return response.data; } else { throw new Error(`Error ${response.status}: ${response.statusText}`); } } }