UNPKG

url-metadata-parser

Version:

Request an http(s) url and scrape its metadata

58 lines 2.38 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const axios_1 = require("axios"); const from_1 = require("rxjs/internal/observable/from"); const operators_1 = require("rxjs/operators"); const of_1 = require("rxjs/internal/observable/of"); const metatag_1 = require("./metatag"); const meta_entity_1 = require("./meta.entity"); const iconvLte = require("iconv-lite"); var Errors; (function (Errors) { Errors["ContentsDoesNotExists"] = "Contents Does not exists."; })(Errors = exports.Errors || (exports.Errors = {})); class UrlMetadataParser { static getCharsetByBom(buf) { const boms = new Map([ ['utf-1', [0xF7, 0x64, 0x4C]], ['utf-7', [0x2B, 0x2F, 0x76, 0x38]], ['utf-7', [0x2B, 0x2F, 0x76, 0x39]], ['utf-7', [0x2B, 0x2F, 0x76, 0x2B]], ['utf-7', [0x2B, 0x2F, 0x76, 0x3F]], ['utf-7', [0x2B, 0x2F, 0x76, 0x38, 0x2D]], ['utf-8', [0xEF, 0xBB, 0xBF]], ['utf-16be', [0xFE, 0xFF]], ['utf-16le', [0xFF, 0xFE]], ['utf-ebcdic', [0xDD, 0x73, 0x66, 0x73]], ['scsu', [0x0E, 0xFE, 0xFF]], ['bocu-1', [0xFB, 0xEE, 0x28]], ['gb-18030', [0x84, 0x31, 0x95, 0x33]], ].map(([c, bytes]) => { return [c, Buffer.from(bytes)]; })); const startsWith = (bom) => { return buf.slice(0, bom.length).equals(bom); }; for (let [charset, bom] of boms) { if (startsWith(bom)) { return of_1.of(charset.toUpperCase()); } } return of_1.of(null); } static parse(url) { return from_1.from(axios_1.default.get(url, { responseType: 'arraybuffer', })).pipe(operators_1.concatMap((res) => { return this.getCharsetByBom(res.data).pipe(operators_1.map((charset) => { const body = iconvLte.decode(res.data, charset || 'UTF-8'); if (body.length <= 0) { throw new Error(Errors.ContentsDoesNotExists); } return body.match(/<meta[^>]+>/g).map(val => new metatag_1.Metatag(val)); })); }), operators_1.map((tags) => new meta_entity_1.MetaEntity(tags))); } } exports.UrlMetadataParser = UrlMetadataParser; //# sourceMappingURL=url-metadata-parser.js.map