UNPKG

audible-api

Version:

A Node.js API for searching the audible website

351 lines (301 loc) 10.2 kB
import { load as loadPage } from "cheerio"; // @ts-ignore import dJSON from "dirty-json"; import fetch from "node-fetch"; import { AggregateOffer, AggregateRating, Audiobook, BreadcrumbList, ListItem, MusicGroup, Person, Product, Thing, } from "schema-dts"; import { URL } from "url"; import { siteCountries } from "./data/audible-search-fields"; import { Book, Creator, Genre, Series } from "./types"; import { getLanguageByName } from "./utils/language"; import { cleanDescription, cleanNarratorUrl, cleanTitle, cleanUrl, getCopyrightYear, } from "./utils/string"; import { SECONDS_IN_HOUR, SECONDS_IN_MINUTE } from "./utils/time"; /** * Get extended information about the author from their Audible URL * * @param author - An initial object of the creator to extend (reuired url) * @returns An extended object of information about the author */ async function parseAuthorInfo(author: Creator): Promise<Creator> { try { if (!author.url) { return author; } const res = await fetch(author.url); const body = await res.text(); const page = loadPage(body); let ldJsonList: Thing[] = []; page('script[type="application/ld+json"]').each((i, elSel) => { // Use dirty-json because the description section has line breaks instead of \n characters like it should const jsonObj = dJSON.parse(page(elSel).text()); ldJsonList.push(jsonObj); }); ldJsonList = ldJsonList.flat(); const newAuthor: Creator = { ...author }; // Get Author's Images const main = ".adbl-main"; newAuthor.thumbnailImageUrl = page("img.author-image-outline", main).attr( "src" ); // parse useful ld+json from the dom of the authors's page const authorJson: MusicGroup = ldJsonList.find( (jsonItem) => jsonItem["@type"] === "MusicGroup" ) as MusicGroup; if (authorJson && typeof authorJson !== "string") { // Get author's name newAuthor.name = (authorJson.name as string) || newAuthor.name; // Get author's bio newAuthor.bio = (authorJson.description as string) || newAuthor.bio; // Get author's amazon ID newAuthor.id = (authorJson.url as string).split("/").pop(); // Get clean author url newAuthor.url = (authorJson.url as string) || newAuthor.url; } const personJson: Person = ldJsonList.find( (jsonItem) => jsonItem["@type"] === "Person" ) as Person; if (personJson && typeof personJson !== "string") { newAuthor.imageUrl = personJson.image as string; } return newAuthor; } catch (err) { return author; } } interface Options { /** * The audible site locality to get the book data from — Default: `us` | Options: `us`, `ca`, `gb`, `au`, `fr`, `de`, `it` */ site?: string; /** * Whether or not to get the full author information for each author. This will add the author's bio and photo urls but it will take more time as their page must be pulled and parsed */ getAuthors?: boolean; } /** * Get all Audible details about an Audiobook from its ASIN * * @param asin - Amazon Standard Identification Number, Amazon's unique ID that they assign to all of their products * @param opts - The optional arguments * @returns The parsed book data */ export default async function getAudibleBook(asin: string, opts: Options = {}) { try { const site = opts.site || "us"; const getAuthors = opts.getAuthors || false; const { url: baseUrl, // language: siteLanguage, } = siteCountries[site] || siteCountries.us; const bookUrl = `${baseUrl}/pd/${asin}?ipRedirectOverride=true`; const res = await fetch(bookUrl); const body = await res.text(); const page = loadPage(body); const main = 'div[role="main"]'; // Get URL const book: Book = { url: page('link[rel="canonical"]').attr("href"), authors: [], narrators: [], }; // Get Authors page(".authorLabel a", main).each((i, elSel) => { const el = page(elSel); const newAuthor: Creator = { name: el.text().trim(), }; const authorUrlPath = el.attr("href"); if (authorUrlPath) { newAuthor.url = cleanUrl(new URL(authorUrlPath, baseUrl).href); } book.authors.push(newAuthor); }); // Get extended author information if (getAuthors) { book.authors = await Promise.all( book.authors.map((author) => parseAuthorInfo(author)) ); } // Get narrators page(".narratorLabel a", main).each((i, elSel) => { const el = page(elSel); const newNarrator: Creator = { name: el.text().trim(), }; const narratorUrlPath = el.attr("href"); if (narratorUrlPath) { newNarrator.url = cleanNarratorUrl( new URL(narratorUrlPath, baseUrl).href ); } book.narrators.push(newNarrator); }); // Get series and series part try { const series: Series[] = []; page(".seriesLabel a", main).each((i, elSel) => { const el = page(elSel); const newSeries: Series = { name: el.text().trim(), }; const seriesUrlPath = el.attr("href"); if (seriesUrlPath) { newSeries.url = cleanUrl(new URL(seriesUrlPath, baseUrl).href); } series.push(newSeries); }); const seriesArr = page(".seriesLabel") .text() .replace(/\n/g, "") .trim() // remove the beginning text from the series .replace("Series: ", "") .split(", ") .map((item) => item.trim()); seriesArr.forEach((seriesStr, i) => { if (seriesStr.includes("Book")) { const seriesPart = Number(seriesStr.replace("Book ", "")); if (seriesPart) { const matchingBookIndex = series.findIndex( (item) => item.name === seriesArr[i - 1] ); if (matchingBookIndex > -1) { series[matchingBookIndex].part = seriesPart; } } } }); book.series = series; } catch (err) { // console.warn(`ERROR PARSING AUDIBLE SERIES'\n${err.stack}`); } // Get Copyright Info string and year try { const copyright = page( ".productPublisherSummary .bc-section > .bc-box:last-child" ) .eq(0) .text() .trim(); if (copyright && copyright.includes("©")) { book.copyright = copyright.replace(/\s+/g, " "); book.copyrightYear = getCopyrightYear(copyright); } } catch (err) { console.warn("Error parsing copyright", err); } let ldJsonList: Thing[] = []; page('script[type="application/ld+json"]').each((i, elSel) => { let jsonObj; try { jsonObj = JSON.parse(page(elSel).text()); } catch (err) { jsonObj = dJSON.parse(page(elSel).text()); } ldJsonList.push(jsonObj); }); ldJsonList = ldJsonList.flat(); // parse useful ld+json from the dom of the page // "@type": "BreadcrumbList" const breadcrumbJson: BreadcrumbList = ldJsonList.find( (jsonItem) => jsonItem["@type"] === "BreadcrumbList" ) as BreadcrumbList; // Get Genres if (breadcrumbJson) { const itemListElement = breadcrumbJson.itemListElement as ListItem[]; const newGenres: Genre[] = []; itemListElement.slice(1).forEach((breadcrumb: ListItem) => { if (breadcrumb) { const item = breadcrumb.item as Thing; if (typeof item !== "string") { newGenres.push({ name: item.name as string, url: `${baseUrl}${item["@id"]}`, }); } } }); book.genres = newGenres; } // "@type": "Product" const productJson: Product = ldJsonList.find( (jsonItem) => jsonItem["@type"] === "Product" ) as Product; if (productJson) { // Get ASIN book.asin = productJson.productID as string; // Get SKU book.sku = productJson.sku as string; } // "@type": "Audiobook" const bookJson: Audiobook = ldJsonList.find( (jsonItem) => jsonItem["@type"] === "Audiobook" ) as Audiobook; if (bookJson) { // Get title book.title = bookJson.name as string; // Get clean title, without series part ("Book N") or "(Unabridged)" book.cleanTitle = cleanTitle(bookJson.name as string); // Get Publisher book.publisher = bookJson.publisher as string; // Get language and language codes const language = getLanguageByName(bookJson.inLanguage as string); if (language) { book.language = language; } // Get full description (without any html) book.description = cleanDescription(bookJson.description as string); // Get the date the book was published book.datePublished = new Date(bookJson.datePublished as string); // Get user rating if (bookJson.aggregateRating) { const { ratingValue, ratingCount } = bookJson.aggregateRating as AggregateRating; book.rating = { value: parseFloat(ratingValue as string), count: Number(ratingCount), }; } // Get pricing if (bookJson.offers) { const { lowPrice, highPrice, priceCurrency } = bookJson.offers as AggregateOffer; book.price = { low: Number(lowPrice), high: Number(highPrice), currency: priceCurrency as string, }; } // Get Abridgement book.isAbridged = (bookJson.abridged as string) === "true"; // Get duration in seconds const durationStr = bookJson.duration as string; if (durationStr) { const hours = Number(durationStr.match(/\d+(?=H)/)?.[0] || 0); const minutes = Number(durationStr.match(/\d+(?=M)/)?.[0] || 0); book.duration = hours * SECONDS_IN_HOUR + minutes * SECONDS_IN_MINUTE; } // Get cover image URL book.coverUrl = bookJson.image as string; } return book; } catch (err) { console.error(`ERROR PARSING AUDIBLE BOOK FROM ASIN: ${asin}`); throw err; } }