metascraper-publisher
Version:
Get publisher property from HTML markup
59 lines (50 loc) • 1.98 kB
JavaScript
'use strict'
const { $filter, $jsonld, publisher, toRule } = require('@metascraper/helpers')
const REGEX_TITLE = /^.*?[-|]\s+(.*)$/
const toPublisher = toRule(publisher)
const getFromTitle = (text, regex = REGEX_TITLE) => {
const matches = regex.exec(text)
if (!matches) return false
let result = matches[1]
while (regex.test(result)) result = regex.exec(result)[1]
return result
}
module.exports = () => {
const rules = {
publisher: [
toPublisher($jsonld('publisher.name')),
toPublisher($ => $('meta[property="og:site_name"]').attr('content')),
toPublisher($ => $('meta[name*="application-name" i]').attr('content')),
toPublisher($ => $('meta[name*="app-title" i]').attr('content')),
toPublisher($ => $('meta[property*="app_name" i]').attr('content')),
toPublisher($ => $('meta[name="publisher" i]').attr('content')),
toPublisher($ =>
$('meta[name="twitter:app:name:iphone"]').attr('content')
),
toPublisher($ =>
$('meta[property="twitter:app:name:iphone"]').attr('content')
),
toPublisher($ => $('meta[name="twitter:app:name:ipad"]').attr('content')),
toPublisher($ =>
$('meta[property="twitter:app:name:ipad"]').attr('content')
),
toPublisher($ =>
$('meta[name="twitter:app:name:googleplay"]').attr('content')
),
toPublisher($ =>
$('meta[property="twitter:app:name:googleplay"]').attr('content')
),
toPublisher($ => $filter($, $('#logo'))),
toPublisher($ => $filter($, $('.logo'))),
toPublisher($ => $filter($, $('a[class*="brand" i]'))),
toPublisher($ => $('[class*="logo" i] a img[alt]').attr('alt')),
toPublisher($ => $('[class*="logo" i] img[alt]').attr('alt')),
toPublisher($ =>
$filter($, $('title'), el => getFromTitle($filter.fn(el)))
)
]
}
rules.pkgName = 'metascraper-publisher'
return rules
}
module.exports.getFromTitle = getFromTitle