UNPKG

job-scraper

Version:

An api that returns job prospect data scraped from indeed and monster

105 lines (90 loc) 4.62 kB
'use strict'; Object.defineProperty(exports, "__esModule", { value: true }); var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); var _jquery = require('jquery'); var _jquery2 = _interopRequireDefault(_jquery); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _asyncToGenerator(fn) { return function () { var gen = fn.apply(this, arguments); return new Promise(function (resolve, reject) { function step(key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { return Promise.resolve(value).then(function (value) { step("next", value); }, function (err) { step("throw", err); }); } } return step("next"); }); }; } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } var IndeedParser = function () { function IndeedParser() { _classCallCheck(this, IndeedParser); } _createClass(IndeedParser, null, [{ key: 'getCompany', value: function getCompany(data) { var capture = data.match(/<span.+?class=\"company.+?<\/span>/gm); return capture == null ? '' : capture[0].replace(/<.+?>/gm, '').trim(); } }, { key: 'getLocation', value: function getLocation(data) { var capture = data.match(/location.+?</gm); return capture == null ? '' : capture[0].replace(/.*>/g, '').replace('<', '').trim(); } }, { key: 'getDate', value: function getDate(data) { var capture = data.match(/date.+?</gm); return capture == null ? '' : capture[0].replace(/.*>/g, '').replace('<', '').trim(); } }, { key: 'getTitle', value: function getTitle(data) { var capture = data.match(/jobTitle.+?<\/a>/gm); return capture == null ? '' : capture[0].replace(/<b>/g, '').replace(/<\/b>/g, '').replace(/<\/a>/g, '').replace(/jobTitle\">/g, '').replace('&#039;', "'").trim(); } }, { key: 'resolveDescription', value: function resolveDescription(url, prospect) { return _jquery2.default.getJSON('http://allorigins.me/get?url=' + encodeURIComponent(url) + '&callback=?').then(function (el) { var capture = el.contents.replace(/\n/g, '').match(/class=\"{0,1}summary.+?<\/span>/gm); prospect.descriptionHTML = capture == null ? '<p style="text-align:center;">DESCRIPTION UNAVAILABLE</p>' : '<span ' + capture[0].trim(); }).catch(function (err) { console.log(err); }); } }, { key: 'getTruncation', value: function getTruncation(data) { var capture = data.match(/<span class=\"{0,1}summary.+?<\/span>/gm); return capture == null ? '' : capture[0].replace(/<span.+?>/g, '').replace(/<\/span>/g, '').replace(/<b>/g, '').replace(/<\/b>/g, '').replace(/\&amp\;/g, '&').trim(); } }, { key: 'setDescription', value: function () { var _ref = _asyncToGenerator( /*#__PURE__*/regeneratorRuntime.mark(function _callee(data, query, prospect) { var url; return regeneratorRuntime.wrap(function _callee$(_context) { while (1) { switch (_context.prev = _context.next) { case 0: url = this.getUrl(data, query); _context.next = 3; return this.resolveDescription(url, prospect); case 3: return _context.abrupt('return', _context.sent); case 4: case 'end': return _context.stop(); } } }, _callee, this); })); function setDescription(_x, _x2, _x3) { return _ref.apply(this, arguments); } return setDescription; }() }, { key: 'getUrl', value: function getUrl(data, query) { var capture = data.match(/href=\".+?\"/gm); return capture == null ? '' : 'https://www.indeed.com/viewjob' + capture[0].replace('href="', '').replace('"', '').replace('/rc/clk', '').trim(); } }]); return IndeedParser; }(); exports.default = IndeedParser;