UNPKG

open-graph-scraper

Version:

Node.js scraper service for Open Graph info

522 lines (501 loc) 19.4 kB
var app = require('../app'), expect = require('expect.js'); // test url - this has alot of OG info var options1 = { 'url': 'http://ogp.me/' }; // test url formats var options2 = { 'url': 'http://www.wikipedia.org/' }, options3 = { 'url': 'https://www.wikipedia.org/' }, options4 = { 'url': 'www.wikipedia.org/' }, options5 = { 'url': 'wikipedia.org/' }, options6 = { 'url': 'http://wikipedia.org/' }; // invaild url var options7 = { 'url': 'http://testtesttest4564568.com' }; // empty value var optionsNoUrl = { 'url': '' }; // no url var optionsEmpty = { }; // test timeout var options8 = { 'url': 'http://www.wikipedia.org/', 'timeout': 2000 }, options9 = { 'url': 'http://www.wikipedia.org/', 'timeout': '' }, options10 = { 'url': 'http://www.wikipedia.org/', 'timeout': '2000' }, options11 = { 'url': 'http://www.wikipedia.org/', 'timeout': 'sdsdds' }; // some bad urls var options12 = { 'url': 23233 }, options13 = { 'url': '2323233' }, options14 = { 'url': 'this is a testt' }, options15 = { 'url': 'https://github.com/jshemas/notOpenGraphScraper' }; // test getting only open graph tags var options16 = { 'url': 'http://www.wikipedia.org/', 'onlyGetOpenGraphInfo': true }; // test getting the description from meta tags var options17 = { 'url': 'https://twitter.com/' }; // testing 304 page var options18 = { 'url': 'http://www.wemeanbusinesslondon.com/blog/2016/5/10/the-entrepreneur-spiration-series-going-nuts-for-pip-nut' }; // testing all media var options19 = { 'url': 'http://ogp.me', 'allMedia': true }; // test videos var optionsYoutube = { 'url': 'https://www.youtube.com/watch?v=dQw4w9WgXcQ' }, optionsTwitch = { 'url': 'https://www.twitch.tv/warcraft/v/78039967' }; // test image var optionsFlickr = { 'url': 'https://www.flickr.com/photos/travelgraph/18791678505/in/gallery-flickr-72157663638192642/' }; // test twitter tags var optionTwitter = { 'url': 'https://dev.twitter.com/' }; // test OG and twitter tags var optionsGithub = { 'url': 'https://github.com' }, optionsAtom = { 'url': 'https://atom.io' }; // test charset utf-8 var optionCharset1 = { 'url': 'http://ogp.me/', 'withCharset': true }; // test charset windows-1251 var optionCharset2 = { 'url': 'http://www.gazeta.ru/', 'encoding': null, 'withCharset': true }; // test for legacy with no charset var optionCharset3 = { 'url': 'http://www.f2.dion.ne.jp/~initialt/errdiffusion.html', 'encoding': null }; // test not a html page var optionNotHTML = { 'url': 'https://upload.wikimedia.org/wikipedia/commons/a/a2/Overlook_Hong_Kong_Island_north_coast,_Victoria_Harbour_and_Kowloon_from_middle_section_of_Lugard_Road_at_daytime_(enlarged_version_and_better_contrast,_revised).jpg' }; describe('GET OG', function () { this.timeout(10000); // should wait at least ten seconds before failing it('Valid Call - ogp.me should return open graph data', function (done) { app(options1, function (err, result, response) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle).to.be('Open Graph protocol'); expect(result.data.ogType).to.be('website'); expect(result.data.ogUrl).to.be('http://ogp.me/'); expect(result.data.ogDescription).to.be('The Open Graph protocol enables any web page to become a rich object in a social graph.'); expect(result.data.ogImage.url).to.be('http://ogp.me/logo.png'); expect(result.data.ogImage.width).to.be('300'); expect(result.data.ogImage.height).to.be('300'); expect(result.data.ogImage.type).to.be('image/png'); expect(response).to.be.an('object'); done(); }); }); it('Valid Call - http', function (done) { app(options2, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle).to.be('Wikipedia'); done(); }); }); it('Valid Call - https', function (done) { app(options3, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle).to.be('Wikipedia'); done(); }); }); it('Valid Call - no protocol', function (done) { app(options4, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle).to.be('Wikipedia'); done(); }); }); it('Valid Call - no protocol and no wwww', function (done) { app(options5, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle).to.be('Wikipedia'); done(); }); }); it('Valid Call - protocol with no wwww', function (done) { app(options6, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle).to.be('Wikipedia'); done(); }); }); it('Invalid Call - fake page', function (done) { app(options7, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(true); expect(result.success).to.be(false); expect(result.err).to.be('Page Not Found'); done(); }); }); it('Invalid Call - empty url', function (done) { app(optionsNoUrl, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(true); expect(result.success).to.be(false); expect(result.err).to.be('Invalid URL'); done(); }); }); it('Invalid Call - empty options', function (done) { app(optionsEmpty, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(true); expect(result.err).to.be('Invalid URL'); expect(result.success).to.be(false); done(); }); }); it('Valid Call - timeout set to 2000', function (done) { app(options8, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle).to.be('Wikipedia'); done(); }); }); it('Valid Call - timeout set to empty string', function (done) { app(options9, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle).to.be('Wikipedia'); done(); }); }); it('Valid Call - timeout number is a string', function (done) { app(options10, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle).to.be('Wikipedia'); done(); }); }); it('Valid Call - time is just a string of chars', function (done) { app(options11, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle).to.be('Wikipedia'); done(); }); }); it('Invalid Call - url is just a number', function (done) { app(options12, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(true); expect(result.success).to.be(false); expect(result.err).to.be('Page Not Found'); done(); }); }); it('Invalid Call - url is a string of numbers', function (done) { app(options13, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(true); expect(result.success).to.be(false); expect(result.err).to.be('Page Not Found'); done(); }); }); it('Invalid Call - url is a string of words', function (done) { app(options14, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(true); expect(result.success).to.be(false); expect(result.err).to.be('Page Not Found'); done(); }); }); it('Invalid Call - response code is 404', function (done) { app(options15, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(true); expect(result.success).to.be(false); expect(result.err).to.be('Page Not Found'); done(); }); }); it('Valid Call - only get open graph info', function (done) { app(options16, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data).to.be.empty(); done(); }); }); it('Valid Call - test getting the description from meta tags', function (done) { app(options17, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle.length > 0).to.be(true); expect(result.data.ogDescription.length > 0).to.be(true); done(); }); }); it('Valid Call - testing 304 page', function (done) { app(options18, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle).to.be('The Entrepreneur-spiration Series: Going nuts for Pip & Nut'); done(); }); }); it('Valid Call - should contain array of images', function (done) { app(options19, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogImage[0].url).to.be('http://ogp.me/logo.png'); expect(result.data.ogImage[0].width).to.be('300'); expect(result.data.ogImage[0].height).to.be('300'); expect(result.data.ogImage[0].type).to.be('image/png'); done(); }); }); it('Valid Call - Test Youtube Video - Should Return correct Open Graph Info', function (done) { app(optionsYoutube, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogSiteName).to.be('YouTube'); expect(result.data.ogTitle).to.be('Rick Astley - Never Gonna Give You Up'); expect(result.data.ogUrl).to.be('https://www.youtube.com/watch?v=dQw4w9WgXcQ'); expect(result.data.ogDescription).to.be('Rick Astley - Never Gonna Give You Up (Official Music Video) - Listen On Spotify: http://smarturl.it/AstleySpotify Download Rick\'s Number 1 album "50" - http...'); expect(result.data.ogType).to.be('video'); expect(result.data.ogImage.url).to.be('https://i.ytimg.com/vi/dQw4w9WgXcQ/maxresdefault.jpg'); expect(result.data.ogVideo.url).to.be('https://www.youtube.com/embed/dQw4w9WgXcQ'); expect(result.data.ogVideo.type).to.be('text/html'); expect(result.data.ogVideo.width).to.be('1280'); expect(result.data.ogVideo.height).to.be('720'); done(); }); }); it('Valid Call - Test Twitch.tv Video - Should Return correct Open Graph Info', function (done) { app(optionsTwitch, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogSiteName).to.be('Twitch'); expect(result.data.ogType).to.be('video'); expect(result.data.ogImage.url).to.be('https://static-cdn.jtvnw.net/s3_vods/294d4c5c42_warcraft_22339636096_485121236/thumb/thumb0-480x320.jpg'); expect(result.data.ogVideo.url).to.be('http://player.twitch.tv/?video=v78039967&player=facebook&autoplay=true'); expect(result.data.ogVideo.type).to.be('text/html'); expect(result.data.ogVideo.width).to.be('620'); expect(result.data.ogVideo.height).to.be('378'); done(); }); }); it('Valid Call - Test Flickr Image - Should Return correct Open Graph Info', function (done) { app(optionsFlickr, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogSiteName).to.be('Flickr'); expect(result.data.ogTitle).to.be('Heimgarten'); expect(result.data.ogUrl).to.be('https://www.flickr.com/photos/travelgraph/18791678505/'); expect(result.data.ogType).to.be('flickr_photos:photo'); expect(result.data.ogImage.url).to.be('https://c1.staticflickr.com/1/499/18791678505_5886fefcf7_b.jpg'); expect(result.data.ogImage.width).to.be('1024'); expect(result.data.ogImage.height).to.be('375'); done(); }); }); it('Valid Call - Test Twitter Tags - Should Return correct Open Graph Info + Some Twitter Info - Twitter Site', function (done) { app(optionTwitter, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.twitterTitle).to.be('Twitter Developers'); expect(result.data.twitterCard).to.be('summary'); expect(result.data.twitterDescription).to.be('The Twitter platform connects your website or application with the worldwide conversation happening on Twitter.'); expect(result.data.twitterImage.url).to.contain('_static/imgs/twitterdev_gear.png'); expect(result.data.ogSiteName).to.be('Twitter Developers'); expect(result.data.ogTitle).to.be('Twitter Developers'); expect(result.data.ogUrl).to.be('https://dev.twitter.com/'); expect(result.data.ogType).to.be('website'); expect(result.data.ogImage.url).to.contain('_static/imgs/twitterdev_gear.png'); done(); }); }); it('Valid Call - Test Twitter Tags - Should Return correct Open Graph Info + Some Twitter Info - Github Site', function (done) { app(optionsGithub, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogUrl).to.be('https://github.com'); expect(result.data.ogSiteName).to.be('GitHub'); expect(result.data.ogTitle).to.be('Build software better, together'); expect(result.data.ogDescription).to.be.a('string'); expect(result.data.ogImage.url).to.be('https://assets-cdn.github.com/images/modules/open_graph/github-logo.png'); expect(result.data.ogImage.width).to.be('1200'); expect(result.data.ogImage.height).to.be('1200'); expect(result.data.ogImage.type).to.be('image/png'); done(); }); }); it('Valid Call - Test Twitter Tags - Should Return correct Open Graph Info + Some Twitter Info - Atom Site', function (done) { app(optionsAtom, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogUrl).to.be('https://atom.io/'); expect(result.data.ogSiteName).to.be('Atom'); expect(result.data.ogTitle).to.be('A hackable text editor for the 21st Century'); expect(result.data.ogDescription).to.be('At GitHub, we’re building the text editor we’ve always wanted: hackable to the core, but approachable on the first day without ever touching a config file. We can’t wait to see what you build with it.'); expect(result.data.ogType).to.be('website'); expect(result.data.twitterCard).to.be('summary_large_image'); expect(result.data.twitterSite).to.be('@AtomEditor'); expect(result.data.twitterCreator).to.be('@github'); expect(result.data.twitterTitle).to.be('Atom'); expect(result.data.twitterDescription).to.be('A hackable text editor for the 21st Century'); expect(result.data.ogImage.url).to.be('http://og.github.com/atom-mark/atom-mark@1200x630.png'); expect(result.data.ogImage.width).to.be('1200'); expect(result.data.ogImage.height).to.be('630'); expect(result.data.ogImage.type).to.be(null); expect(result.data.twitterImage.url).to.be('http://og.github.com/atom-logo/atom-logo@1200x630.png'); expect(result.data.twitterImage.width).to.be('1200'); expect(result.data.twitterImage.height).to.be('630'); expect(result.data.twitterImage.alt).to.be(null); done(); }); }); it('Valid Call - Utf-8 charset - Should Return correct Open Graph Info + charset info', function (done) { app(optionCharset1, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.charset).to.be('utf8'); done(); }); }); it('Valid Call - windows-1251 charset - Should Return correct Open Graph Info + charset info', function (done) { app(optionCharset2, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.charset).to.be('windows-1251'); expect(result.data.ogTitle).to.be('Главные новости - Газета.Ru'); done(); }); }); it('Valid Call - legacy no charset - Should Return correct Open Graph Info + charset info', function (done) { app(optionCharset3, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(false); expect(result.success).to.be(true); expect(result.data.ogTitle).to.be('誤差拡散とは'); done(); }); }); it('Invalid Call - Not a HTML page', function (done) { app(optionNotHTML, function (err, result) { console.log('err:', err); console.log('result:', result); expect(err).to.be(true); expect(result.success).to.be(false); expect(result.err).to.be('Must scrape an HTML page'); done(); }); }); });