metascraper
Version:
A library to easily scrape metadata from an article on the web using Open Graph, JSON+LD, regular HTML metadata, and series of fallbacks.
39 lines (33 loc) • 890 B
JavaScript
const { isUrl } = require('@metascraper/helpers')
const { load } = require('cheerio')
const whoops = require('whoops')
const { loadRules, mergeRules } = require('./rules')
const getData = require('./get-data')
const MetascraperError = whoops('MetascraperError')
module.exports = rules => {
const loadedRules = loadRules(rules)
return async ({
url,
html = '',
htmlDom,
rules: inlineRules,
validateUrl = true,
omitPropNames = new Set(),
pickPropNames,
...props
} = {}) => {
if (validateUrl && !isUrl(url)) {
throw new MetascraperError({
message: 'Need to provide a valid URL.',
code: 'INVALID_URL'
})
}
return getData({
url,
htmlDom: htmlDom ?? load(html, { baseURI: url }),
rules: mergeRules(inlineRules, loadedRules, omitPropNames, pickPropNames),
...props
})
}
}