UNPKG

@extractus/feed-extractor

Version:

To read and normalize RSS/ATOM/JSON feed data

75 lines (60 loc) 1.97 kB
// main.js import { isValid as isValidUrl } from './utils/linker.js' import retrieve from './utils/retrieve.js' import { validate, xml2obj, isRSS, isAtom, isRdf } from './utils/xmlparser.js' import parseJsonFeed from './utils/parseJsonFeed.js' import parseRssFeed from './utils/parseRssFeed.js' import parseAtomFeed from './utils/parseAtomFeed.js' import parseRdfFeed from './utils/parseRdfFeed.js' const getopt = (options = {}) => { const { normalization = true, descriptionMaxLen = 250, useISODateFormat = true, xmlParserOptions = {}, baseUrl = '', getExtraFeedFields = () => ({}), getExtraEntryFields = () => ({}), } = options return { normalization, descriptionMaxLen, useISODateFormat, xmlParserOptions, baseUrl, getExtraFeedFields, getExtraEntryFields, } } export const extractFromJson = (json, options = {}) => { return parseJsonFeed(json, getopt(options)) } export const extractFromXml = (xml, options = {}) => { if (!validate(xml)) { throw new Error('The XML document is not well-formed') } const opts = getopt(options) const data = xml2obj(xml, opts.xmlParserOptions) return isRSS(data) ? parseRssFeed(data, opts) : isAtom(data) ? parseAtomFeed(data, opts) : isRdf(data) ? parseRdfFeed(data, opts) : null } export const extract = async (url, options = {}, fetchOptions = {}) => { if (!isValidUrl(url)) { throw new Error('Input param must be a valid URL') } const data = await retrieve(url, fetchOptions) if (!data.text && !data.json) { throw new Error(`Failed to load content from "${url}"`) } const { type, json, text } = data return type === 'json' ? extractFromJson(json, options) : extractFromXml(text, options) } export const read = async (url, options, fetchOptions) => { console.warn('WARNING: read() is deprecated. Please use extract() instead!') return extract(url, options, fetchOptions) }