UNPKG

mf-obj

Version:
580 lines 21.1 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator.throw(value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : new P(function (resolve) { resolve(result.value); }).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments)).next()); }); }; var parser = require('microformat-node'); const Request = require('request'); const cheerio = require('cheerio'); const url = require('url'); var debug = require('debug')('mf-obj'); exports.request = function (url) { return new Promise((resolve, reject) => { Request.get({ url: url, headers: { 'User-Agent': 'mf-obj' } }, (err, result) => err !== null ? reject(err) : resolve(result)); }); }; function getOembed(html) { return __awaiter(this, void 0, void 0, function* () { var $ = cheerio.load(html); var link = $('link[rel=\'alternate\'][type=\'application/json+oembed\'],' + 'link[rel=\'alternate\'][type=\'text/json+oembed\']').attr('href'); if (link == null) throw new Error('No oembed link found'); debug('Fetching ' + link); var res = yield exports.request(link); if (res.statusCode !== 200) throw new Error('Server returned status ' + res.statusCode); var embed = JSON.parse(res.body); return embed; }); } function getOpengraph(html) { var $ = cheerio.load(html); var res = { title: $('meta[property=\'og:title\']').attr('content'), image: $('meta[property=\'og:image\']').attr('content'), url: $('meta[property=\'og:url\']').attr('content'), description: $('meta[property=\'og:description\']').attr('content') }; if (res.title == null || res.url == null) throw new Error('No opengraph data found'); return res; } function escapeHtml(str) { return str.replace(/&/g, '&amp;'). replace(/</g, '&lt;'). replace(/>/g, '&gt;'); } exports.escapeHtml = escapeHtml; function getLinks(html) { var $ = cheerio.load(html); return $('a').toArray().map(a => a.attribs['href']); } var entryStrategies = { 'entry': function (html, url) { return __awaiter(this, void 0, void 0, function* () { var entry = yield getEntryFromHtml(html, url); if (entry.author !== null && entry.author.url !== null && entry.author.name === null) { try { var author = yield getCard(entry.author.url); if (author !== null) entry.author = author; } catch (err) { debug('Failed to fetch author page: ' + err.message); } } return entry; }); }, 'event': function (html, url) { return __awaiter(this, void 0, void 0, function* () { var event = yield getEventFromHtml(html, url); var entry = new Entry(url); entry.name = event.name; entry.content = { html: escapeHtml(event.name), value: event.name }; return entry; }); }, 'oembed': function (html, url) { return __awaiter(this, void 0, void 0, function* () { let entry = new Entry(url); var oembed = yield getOembed(html); if (oembed.title != null) entry.name = oembed.title; if (oembed.html != null) { let $ = cheerio.load(oembed.html); entry.content = { html: oembed.html, value: $(':root').text() }; } if (oembed.author_url != null && oembed.author_name != null) { entry.author = new Card(oembed.author_url); entry.author.name = oembed.author_name; } return entry; }); }, 'opengraph': function (html, url) { return __awaiter(this, void 0, void 0, function* () { let entry = new Entry(url); let og = getOpengraph(html); if (og.description != null) { entry.name = og.title; entry.content = { html: escapeHtml(og.description), value: og.description }; } else { entry.content = { html: escapeHtml(og.title), value: og.title }; } return entry; }); }, 'html': function (html, url) { return __awaiter(this, void 0, void 0, function* () { let entry = new Entry(url); let $ = cheerio.load(html); entry.name = $('title').text(); entry.content = { html: html, value: $('body').text() }; return entry; }); } }; function getEntry(url, strategies) { return __awaiter(this, void 0, Promise, function* () { if (strategies == null) strategies = ['entry']; var errs = []; debug('Fetching ' + url); var res = yield exports.request(url); if (res.statusCode != 200) throw new Error('Server returned status ' + res.statusCode); for (let s of strategies) { try { return yield entryStrategies[s](res.body, url); } catch (err) { errs.push(err); } } throw new Error('All strategies failed: ' + errs.reduce((p, c) => p + ',' + c.message)); }); } exports.getEntry = getEntry; function getEvent(url) { return __awaiter(this, void 0, Promise, function* () { debug('Fetching ' + url); var res = yield exports.request(url); if (res.statusCode != 200) throw new Error('Server returned status ' + res.statusCode); return getEventFromHtml(res.body, url); }); } exports.getEvent = getEvent; function getCard(url) { return __awaiter(this, void 0, Promise, function* () { debug('Fetching ' + url); var res = yield exports.request(url); if (res.statusCode != 200) throw new Error('Server returned status ' + res.statusCode); var mf = yield parser.getAsync({ html: res.body, baseUrl: url }); var cards = mf.items. filter(i => i.type.some(t => t == 'h-card')). map(h => buildCard(h)); // 1. uid and url match author-page url var match = cards.filter(c => c.url != null && c.uid != null && urlsEqual(c.url, url) && urlsEqual(c.uid, url)); if (match.length > 0) return match[0]; // 2. url matches rel=me if (mf.rels.me != null) { var match = cards.filter(c => mf.rels.me.some(r => c.url != null && urlsEqual(c.url, r))); if (match.length > 0) return match[0]; } // 3. url matches author-page url var match = cards.filter(c => c.url != null && urlsEqual(c.url, url)); if (match.length > 0) return match[0]; return null; }); } exports.getCard = getCard; function getFeed(url) { return __awaiter(this, void 0, Promise, function* () { debug('Fetching ' + url); var res = yield exports.request(url); if (res.statusCode != 200) throw new Error('Server returned status ' + res.statusCode); return getFeedFromHtml(res.body, url); }); } exports.getFeed = getFeed; function getEntryFromHtml(html, url) { return __awaiter(this, void 0, Promise, function* () { var mf = yield parser.getAsync({ html: html, baseUrl: url }); var entries = mf.items.filter(i => i.type.some(t => t == 'h-entry')); if (entries.length == 0) throw new Error('No h-entry found'); else if (entries.length > 1) throw new Error('Multiple h-entries found'); var relAuthor = mf.rels.author != null && mf.rels.author.length > 0 ? new Card(mf.rels.author[0]) : null; let entry = buildEntry(entries[0], relAuthor); return entry; }); } exports.getEntryFromHtml = getEntryFromHtml; function getEventFromHtml(html, url) { return __awaiter(this, void 0, Promise, function* () { var mf = yield parser.getAsync({ html: html, baseUrl: url }); var events = mf.items.filter(i => i.type.some(t => t === 'h-event')); if (events.length == 0) throw new Error('No h-event found'); else if (events.length > 1) throw new Error('Multiple h-events found'); var event = buildEvent(events[0]); if (event.url == null) event.url = url; return event; }); } var feedStrategies = { 'hfeed': function (html, url) { return __awaiter(this, void 0, void 0, function* () { var mf = yield parser.getAsync({ html: html, baseUrl: url }); var feeds = mf.items.filter(i => i.type.some(t => t === 'h-feed')); if (feeds.length == 0) throw new Error('No h-feed found'); else if (feeds.length > 1) throw new Error('Multiple h-feeds found'); var feed = yield buildFeed(feeds[0]); if (feed.url == null) feed.url = url; if (mf.rels.prev != null && mf.rels.prev.length > 0) feed.prev = mf.rels.prev[0]; else if (mf.rels.previous != null && mf.rels.previous.length > 0) feed.prev = mf.rels.previous[0]; if (mf.rels.next != null && mf.rels.next.length > 0) feed.next = mf.rels.next[0]; return feed; }); }, 'implied': function (html, url) { return __awaiter(this, void 0, void 0, function* () { var mf = yield parser.getAsync({ html: html, baseUrl: url }); var entries = mf.items.filter(i => i.type.some(t => t === 'h-entry')); if (entries.length == 0) throw new Error('No h-entries found'); var feed = new Feed(url); var $ = cheerio.load(html); feed.name = $('title').text(); feed.author = yield getCard(url); for (let entry of entries) { feed.addChild(buildEntry(entry, feed.author)); } if (mf.rels.prev != null && mf.rels.prev.length > 0) feed.prev = mf.rels.prev[0]; else if (mf.rels.previous != null && mf.rels.previous.length > 0) feed.prev = mf.rels.previous[0]; if (mf.rels.next != null && mf.rels.next.length > 0) feed.next = mf.rels.next[0]; return feed; }); } }; function getFeedFromHtml(html, url) { return __awaiter(this, void 0, Promise, function* () { var strategies = ['hfeed', 'implied']; var errs = []; for (let s of strategies) { try { return yield feedStrategies[s](html, url); } catch (err) { errs.push(err); } } throw new Error('All strategies failed: ' + errs.reduce((p, c) => p + ',' + c.message)); }); } function prop(mf, name, f) { if (mf.properties[name] != null) { if (f != null) return mf.properties[name].filter(e => e !== '').map(f); return mf.properties[name].filter(e => e !== ''); } return []; } function firstProp(mf, name, f) { if (mf.properties[name] != null) { if (f != null) return f(mf.properties[name][0]); return mf.properties[name][0]; } return null; } function buildCard(mf) { if (typeof (mf) === 'string') return new Card(mf); var card = new Card(); if (!mf.type.some(t => t === 'h-card')) throw new Error('Attempt to parse ' + mf.type + ' as Card'); card.name = firstProp(mf, 'name'); card.photo = firstProp(mf, 'photo'); card.url = firstProp(mf, 'url'); card.uid = firstProp(mf, 'uid'); return card; } function buildEvent(mf) { if (typeof (mf) === 'string') return new Event(mf); var event = new Event(); if (!mf.type.some(t => t === 'h-event')) throw new Error('Attempt to parse ' + mf.type + ' as Event'); event.name = firstProp(mf, 'name'); event.url = firstProp(mf, 'url'); event.start = firstProp(mf, 'start', s => new Date(s)); event.end = firstProp(mf, 'end', e => new Date(e)); event.location = firstProp(mf, 'location', l => buildCard(l)); return event; } function buildFeed(mf) { return __awaiter(this, void 0, void 0, function* () { if (typeof (mf) === 'string') return new Feed(mf); var feed = new Feed(); if (!mf.type.some(t => t === 'h-feed')) throw new Error('Attempt to parse ' + mf.type + ' as Feed'); feed.name = firstProp(mf, 'name'); feed.url = firstProp(mf, 'url'); feed.author = firstProp(mf, 'author', a => buildCard(a)); if (feed.author !== null && feed.author.url !== null && feed.author.name === null) { try { var author = yield getCard(feed.author.url); if (author !== null) feed.author = author; } catch (err) { debug('Failed to fetch author page: ' + err.message); } } (mf.children || []) .filter(i => i.type.some(t => t === 'h-cite' || t === 'h-entry')) .map(e => buildEntry(e, feed.author)) .filter(e => e.url != null) .map(e => feed.addChild(e)); return feed; }); } function buildEntry(mf, defaultAuthor) { if (typeof (mf) === 'string') return new Entry(mf); var entry = new Entry(); if (!mf.type.some(t => t === 'h-entry' || t === 'h-cite')) throw new Error('Attempt to parse ' + mf.type + ' as Entry'); entry.name = firstProp(mf, 'name'); entry.published = firstProp(mf, 'published', p => new Date(p)); entry.content = firstProp(mf, 'content'); entry.summary = firstProp(mf, 'summary'); entry.url = firstProp(mf, 'url'); entry.author = firstProp(mf, 'author', a => buildCard(a)); if (entry.author === null && defaultAuthor) entry.author = defaultAuthor; entry.category = prop(mf, 'category'); entry.syndication = prop(mf, 'syndication'); entry.syndicateTo = prop(mf, 'syndicate-to'); entry.photo = prop(mf, 'photo'); entry.audio = prop(mf, 'audio'); entry.video = prop(mf, 'video'); entry.replyTo = prop(mf, 'in-reply-to', r => buildEntry(r)); entry.likeOf = prop(mf, 'like-of', r => buildEntry(r)); entry.repostOf = prop(mf, 'repost-of', r => buildEntry(r)); entry.embed = firstProp(mf, 'x-embed'); (mf.children || []) .concat(mf.properties['comment'] || []) .filter(i => i.type.some(t => t === 'h-cite' || t === 'h-entry')) .map(e => buildEntry(e)) .filter(e => e.url != null) .map(e => entry.addChild(e)); return entry; } function urlsEqual(u1, u2) { var p1 = url.parse(u1); var p2 = url.parse(u2); return p1.protocol === p2.protocol && p1.host === p2.host && p1.path === p2.path; } class Entry { constructor(url) { this.name = null; this.published = null; this.content = null; this.summary = null; this.url = null; this.author = null; this.category = []; this.syndication = []; this.syndicateTo = []; this.photo = []; this.audio = []; this.video = []; this.replyTo = []; this.likeOf = []; this.repostOf = []; this.embed = null; this.children = new Map(); if (typeof (url) === 'string') { this.url = url; } } _getTime() { if (this.published != null) return this.published.getTime(); return -1; } _getType() { if (this.isLike() || this.isRepost()) return 1; return 0; } getDomain() { var p = url.parse(this.url); return p.protocol + '//' + p.host; } getPath() { return url.parse(this.url).path; } getReferences() { return this.replyTo.concat(this.likeOf).concat(this.repostOf).map(r => r.url); } getMentions() { var allLinks = this.getReferences(); if (this.content != null) allLinks = allLinks.concat(getLinks(this.content.html)); return allLinks; } getChildren(sortFunc) { var values = Array.from(this.children.values()); if (sortFunc != null) values.sort(sortFunc); return values; } addChild(entry) { if (entry.url == null) throw new Error('Url must be set'); this.children.set(entry.url, entry); } deleteChild(url) { return this.children.delete(url); } isReply() { return this.replyTo.length > 0; } isRepost() { return this.repostOf.length > 0; } isLike() { return this.likeOf.length > 0; } isArticle() { return !this.isReply() && !this.isRepost() && !this.isLike() && this.name != null && this.content != null && this.content.value != '' && this.name !== this.content.value; } serialize() { return JSON.stringify(this, (key, val) => { if (key === 'replyTo' || key === 'repostOf' || key === 'likeOf') return val.map(e => e.url); if (key === 'children') return Array.from(val.values()).map(r => r.url); return val; }); } static deserialize(json) { return JSON.parse(json, (key, val) => { if (val != null && key === 'author') { var author = new Card(); author.name = val.name; author.photo = val.photo; author.uid = val.uid; author.url = val.url; return author; } if (key === 'replyTo' || key === 'repostOf' || key === 'likeOf') return val.map(e => new Entry(e)); if (key === 'children') return new Map(val.map(url => [url, new Entry(url)])); if (key === '') { var entry = new Entry(); entry.name = val.name; entry.published = val.published ? new Date(val.published) : null; entry.content = val.content; entry.summary = val.summary; entry.url = val.url; entry.author = val.author; entry.category = val.category; entry.syndication = val.syndication; entry.syndicateTo = val.syndicateTo; entry.replyTo = val.replyTo; entry.likeOf = val.likeOf; entry.repostOf = val.repostOf; entry.embed = val.embed; entry.children = val.children; return entry; } return val; }); } } Entry.byDate = (a, b) => a._getTime() - b._getTime(); Entry.byDateDesc = (a, b) => b._getTime() - a._getTime(); Entry.byType = (a, b) => a._getType() - b._getType(); Entry.byTypeDesc = (a, b) => b._getType() - a._getType(); exports.Entry = Entry; class Card { constructor(urlOrName) { this.name = null; this.photo = null; this.url = null; this.uid = null; if (typeof (urlOrName) === 'string') { if (urlOrName.startsWith('http://') || urlOrName.startsWith('https://')) this.url = urlOrName; else this.name = urlOrName; } } } exports.Card = Card; class Event { constructor(url) { this.name = null; this.url = null; this.start = null; this.end = null; this.location = null; if (typeof (url) === 'string') { this.url = url; } } } exports.Event = Event; class Feed { constructor(url) { this.name = null; this.url = null; this.author = null; this.prev = null; this.next = null; this.children = new Map(); if (typeof (url) === 'string') { this.url = url; } } getChildren(sortFunc) { var values = Array.from(this.children.values()); if (sortFunc != null) values.sort(sortFunc); return values; } addChild(entry) { if (entry.url == null) throw new Error('Url must be set'); this.children.set(entry.url, entry); } deleteChild(url) { return this.children.delete(url); } } exports.Feed = Feed; //# sourceMappingURL=index.js.map