UNPKG

html-metadata-parser

Version:

Html Metadata scraper and parser for Node.js

80 lines (57 loc) 1.94 kB
import axios, { AxiosRequestConfig } from "axios"; import { parse as HTML, HTMLElement } from "node-html-parser"; interface Meta { title?: string; description?: string, image?: string url?: string, type?: string, site_name?: string } export class Metadata { meta: Meta og: Meta images?: string[] } const readMT = (el: HTMLElement, name: string) => { var prop = el.getAttribute('name') || el.getAttribute('property'); return prop == name ? el.getAttribute('content') : null; }; const parse = async (url: string, config?: AxiosRequestConfig): Promise<Metadata> => { if (!/(^http(s?):\/\/[^\s$.?#].[^\s]*)/i.test(url)) return null; const { data } = await axios(url, config); const $ = HTML(data); const og: Meta = {}, meta: Meta = {}, images = []; const title = $.querySelector('title'); if (title) meta.title = title.text; const canonical = $.querySelector('link[rel=canonical]'); if (canonical) { meta.url = canonical.getAttribute('href'); } const metas = $.querySelectorAll('meta'); for (let i = 0; i < metas.length; i++) { const el = metas[i]; // const prop = el.getAttribute('property') || el.getAttribute('name'); ['title', 'description', 'image'].forEach(s => { const val = readMT(el, s); if (val) meta[s] = val; }); ['og:title', 'og:description', 'og:image', 'og:url', 'og:site_name', 'og:type'].forEach(s => { const val = readMT(el, s); if (val) og[s.split(':')[1]] = val; }); } // images $.querySelectorAll('img').forEach(el => { let src: string = el.getAttribute('src'); if (src) { src = new URL(src, url).href; images.push({ src }); } }); return { meta, og, images }; } const parser = parse; export default parser; export { parse, parser };