UNPKG

google-carousel-scraper

Version:
106 lines (79 loc) 3.1 kB
'use strict'; Object.defineProperty(exports, "__esModule", { value: true }); var _regenerator = require('babel-runtime/regenerator'); var _regenerator2 = _interopRequireDefault(_regenerator); var _promise = require('babel-runtime/core-js/promise'); var _promise2 = _interopRequireDefault(_promise); var _asyncToGenerator2 = require('babel-runtime/helpers/asyncToGenerator'); var _asyncToGenerator3 = _interopRequireDefault(_asyncToGenerator2); var _zombie = require('zombie'); var _zombie2 = _interopRequireDefault(_zombie); var _fuzzyDateParseNaive = require('@quarterto/fuzzy-date-parse-naive'); var _fuzzyDateParseNaive2 = _interopRequireDefault(_fuzzyDateParseNaive); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } var IPHONE_AGENT = 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13F69 Safari/601.1'; function getSearchUrl(searchTerm) { return 'https://www.google.co.uk/search?q=' + encodeURIComponent(searchTerm); } exports.default = function () { var ref = (0, _asyncToGenerator3.default)(_regenerator2.default.mark(function _callee(searchTerm) { var options = arguments.length <= 1 || arguments[1] === undefined ? { timeout: 15000 } : arguments[1]; var url, browser, timeout, timeoutPromise, links; return _regenerator2.default.wrap(function _callee$(_context) { while (1) { switch (_context.prev = _context.next) { case 0: url = getSearchUrl(searchTerm); _zombie2.default.waitDuration = 999999; browser = new _zombie2.default({ userAgent: IPHONE_AGENT }); timeout = void 0; timeoutPromise = new _promise2.default(function (resolve, reject) { var err = new Error('Search \'' + searchTerm + '\' timed out'); err.timeout = true; timeout = setTimeout(reject, options.timeout, err); }); _context.prev = 5; _context.prev = 6; _context.next = 9; return _promise2.default.race([browser.visit(url), timeoutPromise]); case 9: _context.next = 15; break; case 11: _context.prev = 11; _context.t0 = _context['catch'](6); if (!_context.t0.timeout) { _context.next = 15; break; } throw _context.t0; case 15: // 🙈 links = browser.queryAll('[data-ampgroup=true] a[data-amp]').map(function (link) { return { link: link.getAttribute('data-amp'), title: link.lastElementChild.textContent, date: (0, _fuzzyDateParseNaive2.default)(link.nextSibling.textContent), publisher: link.getAttribute('data-amp-title') }; }); return _context.abrupt('return', links); case 17: _context.prev = 17; clearTimeout(timeout); browser.destroy(); return _context.finish(17); case 21: case 'end': return _context.stop(); } } }, _callee, this, [[5,, 17, 21], [6, 11]]); })); return function (_x, _x2) { return ref.apply(this, arguments); }; }(); module.exports = exports['default'];