UNPKG

unfluff

Version:
141 lines (140 loc) 5.24 kB
// Generated by CoffeeScript 2.0.0-beta7 void function () { var cheerio, cleaner, extractor, getCleanedDoc, getParsedDoc, getTopNode, unfluff; cheerio = require('cheerio'); extractor = require('./extractor'); cleaner = require('./cleaner'); module.exports = unfluff = function (html, language) { var doc, lng, pageData, topNode; doc = cheerio.load(html); lng = language || extractor.lang(doc); pageData = { title: extractor.title(doc), softTitle: extractor.softTitle(doc), date: extractor.date(doc), author: extractor.author(doc), publisher: extractor.publisher(doc), copyright: extractor.copyright(doc), favicon: extractor.favicon(doc), description: extractor.description(doc), keywords: extractor.keywords(doc), lang: lng, canonicalLink: extractor.canonicalLink(doc), tags: extractor.tags(doc), image: extractor.image(doc) }; cleaner(doc); topNode = extractor.calculateBestNode(doc, lng); pageData.videos = extractor.videos(doc, topNode); pageData.links = extractor.links(doc, topNode, lng); pageData.text = extractor.text(doc, topNode, lng); return pageData; }; unfluff.lazy = function (html, language) { return { title: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.title_ ? this.title_ : this.title_ = extractor.title(doc); }, softTitle: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.softTitle_ ? this.softTitle_ : this.softTitle_ = extractor.softTitle(doc); }, date: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.date_ ? this.date_ : this.date_ = extractor.date(doc); }, copyright: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.copyright_ ? this.copyright_ : this.copyright_ = extractor.copyright(doc); }, author: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.author_ ? this.author_ : this.author_ = extractor.author(doc); }, publisher: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.publisher_ ? this.publisher_ : this.publisher_ = extractor.publisher(doc); }, favicon: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.favicon_ ? this.favicon_ : this.favicon_ = extractor.favicon(doc); }, description: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.description_ ? this.description_ : this.description_ = extractor.description(doc); }, keywords: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.keywords_ ? this.keywords_ : this.keywords_ = extractor.keywords(doc); }, lang: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.language_ ? this.language_ : this.language_ = language || extractor.lang(doc); }, canonicalLink: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.canonicalLink_ ? this.canonicalLink_ : this.canonicalLink_ = extractor.canonicalLink(doc); }, tags: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.tags_ ? this.tags_ : this.tags_ = extractor.tags(doc); }, image: function () { var doc; doc = getParsedDoc.call(this, html); return null != this.image_ ? this.image_ : this.image_ = extractor.image(doc); }, videos: function () { var doc, topNode; if (null != this.videos_) return this.videos_; doc = getCleanedDoc.call(this, html); topNode = getTopNode.call(this, doc, this.lang()); return this.videos_ = extractor.videos(doc, topNode); }, text: function () { var doc, topNode; if (null != this.text_) return this.text_; doc = getCleanedDoc.call(this, html); topNode = getTopNode.call(this, doc, this.lang()); return this.text_ = extractor.text(doc, topNode, this.lang()); }, links: function () { var doc, topNode; if (null != this.links_) return this.links_; doc = getCleanedDoc.call(this, html); topNode = getTopNode.call(this, doc, this.lang()); return this.links_ = extractor.links(doc, topNode, this.lang()); } }; }; getParsedDoc = function (html) { return null != this.doc_ ? this.doc_ : this.doc_ = cheerio.load(html); }; getTopNode = function (doc, lng) { return null != this.topNode_ ? this.topNode_ : this.topNode_ = extractor.calculateBestNode(doc, lng); }; getCleanedDoc = function (html) { var doc; if (null != this.cleanedDoc_) return this.cleanedDoc_; doc = getParsedDoc.call(this, html); this.cleanedDoc_ = cleaner(doc); return this.cleanedDoc_; }; }.call(this);