UNPKG

job-scraper

Version:

An api that returns job prospect data scraped from indeed and monster

222 lines (197 loc) 9.52 kB
<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"> <meta name="viewport" content="width=device-width,initial-scale=1"> <title>title</title> <meta name="author" content="name"> <meta name="description" content="description here"> <meta name="keywords" content="keywords,here"> <link rel="stylesheet" type="text/css" href="css/job-scraper-modal.css"> <link rel="stylesheet" type="text/css" href="css/job-scraper-display.css"> <link rel="stylesheet" type="text/css" href="bootsrap/dist/css/bootstrap-grid.css"> </head> <body> <h1>Hello World</h1> <h1>Hello World</h1> <h1>Hello World</h1> <h1>Hello World</h1> <h1>Hello World</h1> <h1>Hello World</h1> <h1>Hello World</h1> <h1>Hello World</h1> <div id="job-scraper-box"></div> <h1>hello world</h1> <script src="https://code.jquery.com/jquery-3.3.1.min.js" integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=" crossorigin="anonymous"></script> <script type="text/javascript"> function autorun() { class IndeedParser { static getCompany(data) { var capture = data.match(/<span.+?class=\"company.+?<\/span>/gm); return capture == null ? '' : capture[0].replace(/<.+?>/gm, '').trim(); } static getLocation(data) { var capture = data.match(/location.+?</gm); return capture == null ? '' : capture[0].replace(/.*>/g, '').replace('<', '').trim(); } static getDate(data) { var capture = data.match(/date.+?</gm); return capture == null ? '' : capture[0].replace(/.*>/g, '').replace('<', '').trim(); } static getTitle(data) { var capture = data.match(/jobTitle.+?<\/a>/gm); return capture == null ? '' : capture[0].replace(/<b>/g, '').replace(/<\/b>/g, '').replace(/<\/a>/g, '').replace(/jobTitle\">/g, '').replace('&#039;', "'").trim(); } static resolveDescription(url, prospect) { return $.getJSON('http://allorigins.me/get?url=' + encodeURIComponent(url) + '&callback=?') .then(el=>{ var capture = el.contents.replace(/\n/g, '').match(/class=\"{0,1}summary.+?<\/span>/gm); prospect.descriptionHTML = capture == null ? '<p style="text-align:center;">DESCRIPTION UNAVAILABLE</p>' : '<span ' + capture[0].trim(); }) .catch(err=>{console.log(err)}) } static getTruncation(data) { var capture = data.match(/<span class=\"{0,1}summary.+?<\/span>/gm); return capture == null ? '' : capture[0].replace(/<span.+?>/g, '').replace(/<\/span>/g, '').replace(/<b>/g, '').replace(/<\/b>/g, '').replace(/\&amp\;/g, '&').trim(); } static async setDescription(data, query, prospect) { var url = this.getUrl(data, query); return await this.resolveDescription(url, prospect); } static getUrl(data, query) { var capture = data.match(/href=\".+?\"/gm); return capture == null ? '' : 'https://www.indeed.com/viewjob' + capture[0].replace('href="', '').replace('"', '').replace('/rc/clk', '').trim(); } } class HtmlUtility { static triggerModal(el) { var prospect = HtmlUtility.cache.data[el.dataset['leadNumber']]; HtmlUtility.cache.company.innerHTML = prospect.company; HtmlUtility.cache.job.innerHTML = prospect.job; HtmlUtility.cache.date.innerHTML = prospect.date; HtmlUtility.cache.company.innerHTML = prospect.company; HtmlUtility.cache.location.innerHTML = prospect.location; HtmlUtility.cache.content.innerHTML = prospect.descriptionHTML; HtmlUtility.cache.modal.classList.add('open'); HtmlUtility.cache.apply.setAttribute('href', prospect.url); } static produceCard(prospect, index, arr) { console.log(prospect) return index > 0 ? `<div class="job-scraper__item-wrapper"> <div class="job-scraper__item"> <div class="job-scraper__item-header"> <p class=""><strong>${prospect.company.toUpperCase()}</strong></p> <p class=""><strong>${prospect.job}</strong></p> <p class=""><em>From</em> ${prospect.date} | ${prospect.location}</p> </div> <div class="job-scraper__item-content"> <p class="">${prospect.truncatedDescription}</p> </div> <div class="job-scraper__item-footer"> <p class="job-scraper__item-footer-content"><a data-lead-number="${index}" class="job-scraper__item-button">View Job</a></p> </div> </div> </div>` : ''; } static closeModal() { this.cache.modal.classList.remove('open'); } } class JobProspect { constructor(company, location, date, job, url, description, truncated, _data) { this.company = company; this.location = location; this.date = date; this.job = job; this.url = url; this.truncatedDescription = truncated; this.descriptionHTML = description; this._data = _data; } } class Scraper { constructor(jobTitle, city, state, type=null) { this.jobTitle = jobTitle.replace(/ /g, '+').replace(/,/g, '%2C'); this.city = city.replace(/ /g, '+').replace(/,/g, '%2C'); this.state = state.replace(/ /g, '+').replace(/,/g, '%2C'); this.type = type; this.query = 'http://allorigins.me/get?url=https%3A//www.indeed.com/jobs%3Fq%3D' + this.jobTitle + '%26l%3D' + this.city + '%252C+' + this.state + '&callback=?'; } init() { return $.getJSON(this.query) .then(data=>data.contents.split('data-tn-component="organicJob"') .map(el=>el.replace(/\n/g, '')) .map(el=>{ var prospect = new JobProspect(IndeedParser.getCompany(el), IndeedParser.getLocation(el), IndeedParser.getDate(el), IndeedParser.getTitle(el), IndeedParser.getUrl(el), null, IndeedParser.getTruncation(el), el) IndeedParser.setDescription(el, this.query, prospect); return prospect; })) .catch(err=>{throw err}) } renderCards(arr) { var el = document.getElementById('job-scraper-box'); var preHtml = `<div class="job-scraper__modal-veil"> <div class="job-scraper__modal-wrapper"> <div class="job-scraper__modal-header"> <p><span class="job-scraper__close-box">X</span></p> <p class="job-scraper__company"></p> <hr /> <p class="job-scraper__title"></p> <p class="job-scraper__location"></p> <p class="job-scraper__date"></p> <p>. . .</p> </div> <div class="job-scraper__modal-container"> </div> <div class="job-scraper__modal-footer"> <a class="job-scraper__modal-apply" target="_blank">Apply to Job</a> </div> </div> </div>`; el.insertAdjacentHTML('beforebegin', preHtml); HtmlUtility.cache = { modal: document.getElementsByClassName('job-scraper__modal-veil')[0], company: document.getElementsByClassName('job-scraper__company')[0], job: document.getElementsByClassName('job-scraper__title')[0], location: document.getElementsByClassName('job-scraper__location')[0], date: document.getElementsByClassName('job-scraper__date')[0], content: document.getElementsByClassName('job-scraper__modal-container')[0], data: arr, apply: document.getElementsByClassName('job-scraper__modal-apply')[0] } var html = `<div class="job-scraper__items-container">` + arr.map(HtmlUtility.produceCard).join('') + `</div>`; el.insertAdjacentHTML('beforebegin', html); el.parentElement.removeChild(el); var cardButtons = document.getElementsByClassName('job-scraper__item-button'); var closeModalButton = document.getElementsByClassName('job-scraper__close-box')[0]; closeModalButton.addEventListener('click', ()=>{ HtmlUtility.closeModal(); }) for (var i = 0; i < cardButtons.length; i++) { cardButtons[i].addEventListener('click', e=>{ HtmlUtility.triggerModal(e.srcElement) }); } } } var Scrapey = new Scraper('Career Services Advisor', 'New York', 'New York'); var Scrape = Scrapey.init(); function log(e) { console.log(e); } Scrape.then(Scrapey.renderCards).catch(err=>{throw err}) $.getJSON('http://allorigins.me/get?url=https%3A//www.monster.com/jobs/search/%3Fq%3Dcareer-services-advisor%26where%3DTampa__2C-FL%26jobid%3D193877545&callback=?') .then(data=>{console.log(data.contents.split('clickJobTitle').map(el=>el.split('\n')))}) .catch(err=>{console.log(err)}); } if (document.addEventListener) document.addEventListener("DOMContentLoaded", autorun, false); else if (document.attachEvent) document.attachEvent("onreadystatechange", autorun); else window.onload = autorun; </script> </body> </html>