UNPKG

apostrophe

Version:

Apostrophe is a user-friendly content management system. You'll need more than this core module. See apostrophenow.org to get started.

664 lines (626 loc) • 22.2 kB
var async = require('async'); var request = require('request'); var oembetter = require('oembetter')(); var cheerio = require('cheerio'); /** * videos * @augments Augments the apos object with resources supporting video storage and playback */ module.exports = function(self) { // Retrieve videos. Query parameters are `skip`, `limit` and `q`. // // `q` searches the metadata of videos. // `skip` and `limit` are used to implement pagination. `limit` defauls to 10 and cannot // exceed 100. // // The response is a JSON object with `total` and // `videos` properties. The `total` property indicates how many total videos could be returned if // pagination were not taking place. `videos` contains an array of video objects with // metadata fields `title`, `thumbnail`, `tags`, `video` (the URL of the original video on // YouTube or a similar service), `width`, `height`, `credit`, `createdAt`, and `description` // among others. // // In case of error an appropriate HTTP status code is returned. // // The edit-file permission also applies to videos although they aren't // actually stored in local files. self.app.get('/apos/browse-videos', function(req, res) { if (!self.permissions.can(req, 'edit-file', null)) { res.statusCode = 404; return res.send('not found'); } var criteria = {}; var limit = 10; var skip = 0; var q; skip = self.sanitizeInteger(req.query.skip, 0, 0); limit = self.sanitizeInteger(req.query.limit, 0, 0, 100); if (req.query.q) { criteria.searchText = self.searchify(req.query.q); } if (req.query.type) { criteria.type = self.sanitizeString(req.query.type); } if (req.query.notType) { criteria.type = { $ne: self.sanitizeString(req.query.notType) }; } var result = {}; async.series([ function(callback) { return self.videos.count(criteria, function(err, count) { result.total = count; return callback(err); }); }, function(callback) { return self.videos.find(criteria).sort({ createdAt: -1 }).skip(skip).limit(limit).toArray(function(err, videos) { result.videos = videos; return callback(err); }); } ], function(err) { if (err) { res.statusCode = 500; return res.send('error'); } return res.send(result); }); }); // Don't permit oembed of untrusted sites, which could // lead to XSS attacks oembetter.whitelist(oembetter.suggestedWhitelist.concat(self.options.oembedWhitelist || [], [ 'wufoo.com', 'infogr.am' ])); // Make YouTube thumbnails bigger, and embeds opaque to // fix z-index problems oembetter.addAfter(function(url, options, response, callback) { if (!url.match(/youtube/)) { return setImmediate(callback); } // Fix YouTube iframes to use wmode=opaque so they don't // ignore z-index in Windows Chrome response.html = response.html.replace('feature=oembed', 'feature=oembed&wmode=opaque'); // Fix thumbnail to be largest available if it exists if (!response.thumbnail_url) { return setImmediate(callback); } var maxResImage = response.thumbnail_url.replace('hqdefault.jpg', 'maxresdefault.jpg'); return request.head(maxResImage, function(err, httpResponse) { if (response.statusCode < 400) { response.thumbnail_url = maxResImage; } return callback(null); }); }); // Fake oembed for YouTube playlists, they don't have // it for playlists for some crazy reason // // Example: // https://www.youtube.com/playlist?list=PL8E30EA58E2FDB48B oembetter.addBefore(function(url, options, response, callback) { if (!url.match(/youtube.*?playlist/)) { return setImmediate(callback); } var matches = url.match(/list=([^&]+)/); if (!matches) { return setImmediate(callback); } var id = matches[1]; return request(url, function(err, response, body) { if (err) { return callback(err); } var $ = cheerio.load(body); var $title = $('title'); var title = $title.text(); if (title) { title = title.trim(); } // This is a terrible hack but it's effective for now // and means every single A2 developer doesn't need // their own API key var firstVideoId = $('#pl-video-list [data-video-id]').attr('data-video-id'); if (!firstVideoId) { // Unable to continue without a thumbnail return callback(null); } return callback(null, url, options, { type: 'video', html: '<iframe width="560" height="315" src="//www.youtube.com/embed/videoseries?list=' + id + '" frameborder="0" allowfullscreen></iframe>', title: title || 'YouTube Playlist', thumbnail_url: 'https://i.ytimg.com/vi/' + firstVideoId + '/hqdefault.jpg' } ); }); }); // Make vimeo thumbnails bigger oembetter.addAfter(function(url, options, response, callback) { if (!url.match(/vimeo/)) { return setImmediate(callback); } // Fix vimeo thumbnails to be larger response.thumbnail_url = response.thumbnail_url.replace('640.jpg', '1000.jpg'); return callback(null); }); // Fake oembed for wufoo oembetter.addBefore(function(url, options, response, mainCallback) { var who, what, title; return async.series({ // If they used a pretty wufoo URL, we have to // fetch it and find the canonical URL in it first. canonicalize: function(callback) { var matches = url.match(/(\w+)\.wufoo\.com\/forms\/[\w]+\-[\w\-]+/); if (!matches) { return setImmediate(callback); } return request(url, function(err, response, body) { if (err) { return callback(err); } var matches = body.match(/\"(https?\:\/\/\w+\.wufoo\.com\/forms\/\w+)\/\"/); if (matches) { url = matches[1]; } return callback(null); }); }, canonical: function(callback) { // Is it a canonical Wufoo URL? var matches = url.match(/(\w+)\.wufoo\.com\/forms\/([\w]+)/); if (!matches) { // None of our beeswax return mainCallback(null); } who = matches[1]; what = matches[2]; return callback(null); }, title: function(callback) { return request(url, function(err, response, body) { if (err) { return callback(err); } var $ = cheerio.load(body); var $title = $('title'); title = $title.text(); if (title) { title = title.trim(); } return callback(null); }); } }, function(err) { // wufoo embed code as of 2014-07-16. -Tom return mainCallback(null, url, options, { type: 'rich', html: '<div id="wufoo-' + what + '"></div>' + afterScriptLoads('//wufoo.com/scripts/embed/form.js', false, false, 'var s = d.createElement(t), options = {' + "'userName':'" + who + "'," + "'formHash':'" + what + "'," + "'autoResize':true," + "'height':'363'," + "'async':true," + "'host':'wufoo.com'," + "'header':'show'," + "'ssl':true};" + "try { " + what + " = new WufooForm();" + what + ".initialize(options);" + what + ".display(); } catch (e) {};"), title: title || 'Wufoo Form', thumbnail_url: 'https://www.wufoo.com/images/v3/home/banner.jpg' }); }); }); // Fake oembed for infogr.am oembetter.addBefore(function(url, options, response, callback) { var parse = require('url').parse; var parsed = parse(url); var title; if (!oembetter.inDomain('infogr.am', parsed.hostname)) { return setImmediate(callback); } var matches = url.match(/infogr\.am\/([^\?]+)/); if (!matches) { return setImmediate(callback); } var slug = matches[1]; var anchorId = 'apos_infogram_anchor_0_' + slug; return request(url, function(err, response, body) { if (err) { return callback(err); } var $ = cheerio.load(body); var $title = $('title'); title = $title.text(); if (title) { title = title.trim(); } return callback(null, url, options, { thumbnail_url: 'https://infogr.am/infogram.png', title: title || 'Infogram', type: 'rich', html: '<div id="' + anchorId + '"></div>' + afterScriptLoads("//e.infogr.am/js/embed.js", anchorId, 'infogram_0_' + slug, ';') }); }); }); // Given a URL, return a nice oembed response for it // based on its Open Graph tags, or the best we can // fake, based on the HTML markup of the page. self.openGraphEmbed = function(url, callback) { return request(url, function(err, response, body) { if (err) { return callback(err); } var $ = cheerio.load(body); var title = $('meta[property="og:title"]').attr('content') || $('title').text(); if (!title) { // A common goof these days title = $('h1').text(); if (!title) { // Oh c'mon title = url; } } var type = $('meta[property="og:type"]').attr('content') || 'website'; var image = $('meta[property="og:image"]').attr('content'); if (!image) { // Looks like cheerio doesn't do :first yet? var $img = $('img'); if ($img.length) { image = $img.attr('src'); } } if (image) { if (image.match(/^\w+:/)) { if (!image.match(/^https?:/)) { // No dangerous schemes image = undefined; } } else { // Relative URL image = require('url').resolve(url, image); } } if (!image) { image = undefined; } var description = $('meta[property="og:description"]').attr('content') || $('meta[name="description"]').attr('content'); if (!description) { // Remove text that isn't text $('script').remove(); $('styles').remove(); description = $('body').text(); } description = self.truncatePlaintext(description, 300); url = $('meta[property="og:url"]').attr('content') || url; var markup = self.partial('openGraphEmbed.html', { title: title, type: type, image: image, description: description, url: url }); return callback(null, { thumbnail_url: image, title: title, type: 'rich', html: markup }); }); }; // Given a URL, return an oembed response for it // which just iframes the URL given. The response // does have a title property, so we do have to // fetch the URL // // If options.iframeHeight is set, use that # of // pixels, otherwise do not specify & let CSS do it self.iframeEmbed = function(url, options, callback) { return request(url, function(err, response, body) { if (err) { return callback(err); } var $ = cheerio.load(body); var title = $('meta[property="og:title"]').attr('content') || $('title').text(); if (!title) { // A common goof these days title = $('h1').text(); if (!title) { // Oh c'mon title = url; } } var style = ''; var html = '<iframe STYLE src="' + self.escapeHtml(url) + '" class="apos-always-iframe"></iframe>'; if (options.iframeHeight) { style = 'style="height:' + options.iframeHeight + 'px"'; } html = html.replace('STYLE', style); return callback(null, { title: title, type: 'rich', html: html }); }); }; // Returns browser-side javascript to load a given // cross-domain js file dynamically and then run // the javascript code in the `then` string argument. // `script` should be a URL pointing to the third-party // js file and may start with // to autoselect // http or https depending on how the page was loaded. // // You may supply an id attribute for the script tag. // Some services rely on these (infogr.am). // // You may also supply the ID of an element that the // script should be inserted immediately before. Some // services try to infer how they should behave from the // context the script tag is in (infogr.am). // // This code was inspired by the wufoo embed code and // is used to dynamically load wufoo and other services // that use js-based embed codes. function afterScriptLoads(script, beforeId, scriptId, then) { if (script.match(/^\/\//)) { script = "('https:' == d.location.protocol ? 'https:' : 'http:') + '" + script + "'"; } else { script = "'" + script + "'"; } if (scriptId) { scriptId = 's.id = "' + scriptId + '"; '; } else { scriptId = ''; } var before; if (beforeId) { before = 'd.getElementById("' + beforeId + '")'; } else { before = 'd.getElementsByTagName(t)[0]'; } return '<script type="text/javascript">' + '(function(d, t) {' + 'var s = d.createElement(t);' + "s.src = " + script + ";" + scriptId + "s.onload = s.onreadystatechange = function() {" + "var rs = this.readyState; if (rs) if (rs != 'complete') if (rs != 'loaded') return;" + then + "};" + "var scr = " + before + ", par = scr.parentNode; par.insertBefore(s, scr);" + "})(document, 'script');" + "</script>"; } // This method fetches the specified URL, determines its best embedded // representation via oembed, and on success invokes its callback with null // and an object containing the oembed API response from the service provider. // // Responses are automatically cached. // // If options.alwaysIframe is true, the result is a simple // iframe of the URL. // // IF YOU WANT TO ADJUST THE RESPONSE: look ^^^ at the // "oembetter.addAfter" calls above. Also see the oembetter // documentation. Do NOT special case them in the function below. // // Thanks! -Tom self.oembed = function(url, options, mainCallback) { if (!mainCallback) { mainCallback = options; options = {}; } if (!self._oembedCache) { self._oembedCache = self.getCache('oembed'); } var response; var key = url + ':' + JSON.stringify(options); return async.series({ checkCache: function(callback) { return self._oembedCache.get(key, function(err, _response) { if (err) { return callback(err); } if (_response !== undefined) { return mainCallback(err, _response); } return callback(null); }); }, fetch: function(callback) { if (options.alwaysIframe) { return self.iframeEmbed(url, options, function(err, _response) { if (err) { return callback(err); } response = _response; return callback(null); }); } return oembetter.fetch(url, function(err, _response) { if (err) { // Try open graph as a fallback return self.openGraphEmbed(url, function(err, _response) { if (err) { return callback(err); } response = _response; return callback(null); }); } response = _response; return callback(null); }); }, forceSsl: function(callback) { // Make non-secure URLs protocol relative and // let the browser upgrade them to https if needed function makeProtocolRelative(s) { s = s.replace(/^http\:\/\//, '//'); return s.replace(/(["'])http\:\/\//g, '$1//'); } if (response.thumbnail_url) { response.thumbnail_url = makeProtocolRelative(response.thumbnail_url); } if (response.html) { response.html = makeProtocolRelative(response.html); } return setImmediate(callback); }, setCache: function(callback) { // cache oembed responses for one hour return self._oembedCache.set(url, response, 60 * 60, callback); } }, function(err) { if (err) { return mainCallback(err); } return mainCallback(err, response); }); }; // Simple REST API to apos.oembed. Accepts url and // alwaysIframe parameters; alwaysIframe is assumed false // if not provided. The response is a JSON object as returned // by apos.oembed. You may use GET or POST self.app.all('/apos/oembed', function(req, res) { var data = (req.method === 'POST') ? req.body : req.query; var url = self.sanitizeString(data.url); var alwaysIframe = self.sanitizeBoolean(data.alwaysIframe); var iframeHeight = self.sanitizeInteger(data.iframeHeight); var options = { alwaysIframe: alwaysIframe, iframeHeight: iframeHeight }; return self.oembed(url, options, function(err, result) { if (err) { console.error(err); res.statusCode = 404; return res.send('not found'); } return res.send(result); }); }); // Store a video or other oembed object for potential reuse. // Saves metadata such as the title, width, height, video URL, // thumbnail URL and search text. The URL should be sent in // the `video` POST parameter. The response is // a JSON object with the video information if successful, // otherwise an appropriate HTTP status code. self.app.post('/apos/remember-video', function(req, res) { return self.acceptVideo(req, req.body, function(err, video) { if (err) { console.log(err); res.statusCode = 404; return res.send('not found'); } return res.send(video); }); }); // Insert or update a video in Apostrophe's video reuse library. // req is passed for permissions purposes. info should // be an object with a `url` property. The video is examined // via apos.oembed and, if successful, added to the video // reuse collection. The callback is invoked (err, video), // where video (if any) is the video object recorded // in the reuse library. // // Note that this method does not give you back an embed code. // For that, see apos.oembed. // // If the optional `alwaysIframe` parameter is true, then // an iframe is always used to present the given `url`, // at the height specified by the `iframeHeight` parameter. self.acceptVideo = function(req, info, callback) { // for bc we also accept info.video var url = self.sanitizeString(info.url || info.video); var alwaysIframe = self.sanitizeBoolean(info.alwaysIframe); var iframeHeight = self.sanitizeInteger(info.iframeHeight); return self.oembed(url, { alwaysIframe: alwaysIframe }, function(err, result) { if (err) { return callback(err); } var width = result.width; var height = result.height; var video = { title: result.title, width: width, height: height, video: url, type: result.type, alwaysIframe: alwaysIframe, thumbnail: result.thumbnail_url, landscape: width > height, portrait: height > width, searchText: self.sortify(result.title), createdAt: new Date(), iframeHeight: iframeHeight }; var doc; return async.series({ find: function(callback) { return self.videos.findOne({ video: url }, function(err, _doc) { if (err) { return callback(err); } doc = _doc; return callback(null); }); }, update: function(callback) { if (!doc) { return setImmediate(callback); } video._id = doc._id; return self.videos.update({ video: url }, video, callback); }, insert: function(callback) { if (doc) { return setImmediate(callback); } return self.videos.insert(video, function(err, _doc) { if (err) { return callback(err); } doc = _doc; return callback(null); }); } }, function(err) { if (err) { return callback(err); } return callback(null, doc || video); }); }); }; self.tasks.oembed = function(callback) { console.log('Refreshing all oembed data for videos'); // iterator receives page object, area name, area object, item offset, item object. var oembedCache = {}; var n = 0; return self.forEachItem(function(page, name, area, offset, item, callback) { function go(result) { n++; console.log('examining video ' + n); item.thumbnail = result.thumbnail_url; item.title = result.title; return self.pages.update({ _id: page._id }, page, function(err, count) { return callback(err); }); } if (item.type !== 'video') { return callback(null); } if (oembedCache[item.video]) { go(oembedCache[item.video]); } else { // 1/10th second pause between oembed hits to avoid being rate limited // (I don't know what their rate limit is, but...) setTimeout(function() { return oembetter.fetch(item.video, {}, function (err, result) { if (!err) { oembedCache[item.video] = result; go(result); } else { // A few oembed errors are normal and not cause for panic. // Videos go away, for one thing. If you get a zillion of these // it's possible you have hit a rate limit console.log('Warning: oembed error for ' + item.video + '\n'); console.log(err); return callback(null); } }); }, 100); } }, callback); }; };