job-scraper
Version:
An api that returns job prospect data scraped from indeed and monster
222 lines (197 loc) • 9.52 kB
HTML
<html>
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<meta name="viewport" content="width=device-width,initial-scale=1">
<title>title</title>
<meta name="author" content="name">
<meta name="description" content="description here">
<meta name="keywords" content="keywords,here">
<link rel="stylesheet" type="text/css" href="css/job-scraper-modal.css">
<link rel="stylesheet" type="text/css" href="css/job-scraper-display.css">
<link rel="stylesheet" type="text/css" href="bootsrap/dist/css/bootstrap-grid.css">
</head>
<body>
<h1>Hello World</h1>
<h1>Hello World</h1>
<h1>Hello World</h1>
<h1>Hello World</h1>
<h1>Hello World</h1>
<h1>Hello World</h1>
<h1>Hello World</h1>
<h1>Hello World</h1>
<div id="job-scraper-box"></div>
<h1>hello world</h1>
<script
src="https://code.jquery.com/jquery-3.3.1.min.js"
integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8="
crossorigin="anonymous"></script>
<script type="text/javascript">
function autorun()
{
class IndeedParser {
static getCompany(data) {
var capture = data.match(/<span.+?class=\"company.+?<\/span>/gm);
return capture == null ? '' : capture[0].replace(/<.+?>/gm, '').trim();
}
static getLocation(data) {
var capture = data.match(/location.+?</gm);
return capture == null ? '' : capture[0].replace(/.*>/g, '').replace('<', '').trim();
}
static getDate(data) {
var capture = data.match(/date.+?</gm);
return capture == null ? '' : capture[0].replace(/.*>/g, '').replace('<', '').trim();
}
static getTitle(data) {
var capture = data.match(/jobTitle.+?<\/a>/gm);
return capture == null ? '' : capture[0].replace(/<b>/g, '').replace(/<\/b>/g, '').replace(/<\/a>/g, '').replace(/jobTitle\">/g, '').replace(''', "'").trim();
}
static resolveDescription(url, prospect) {
return $.getJSON('http://allorigins.me/get?url=' + encodeURIComponent(url) + '&callback=?')
.then(el=>{
var capture = el.contents.replace(/\n/g, '').match(/class=\"{0,1}summary.+?<\/span>/gm);
prospect.descriptionHTML = capture == null ? '<p style="text-align:center;">DESCRIPTION UNAVAILABLE</p>' : '<span ' + capture[0].trim();
})
.catch(err=>{console.log(err)})
}
static getTruncation(data) {
var capture = data.match(/<span class=\"{0,1}summary.+?<\/span>/gm);
return capture == null ? '' : capture[0].replace(/<span.+?>/g, '').replace(/<\/span>/g, '').replace(/<b>/g, '').replace(/<\/b>/g, '').replace(/\&\;/g, '&').trim();
}
static async setDescription(data, query, prospect) {
var url = this.getUrl(data, query);
return await this.resolveDescription(url, prospect);
}
static getUrl(data, query) {
var capture = data.match(/href=\".+?\"/gm);
return capture == null ? '' : 'https://www.indeed.com/viewjob' + capture[0].replace('href="', '').replace('"', '').replace('/rc/clk', '').trim();
}
}
class HtmlUtility {
static triggerModal(el) {
var prospect = HtmlUtility.cache.data[el.dataset['leadNumber']];
HtmlUtility.cache.company.innerHTML = prospect.company;
HtmlUtility.cache.job.innerHTML = prospect.job;
HtmlUtility.cache.date.innerHTML = prospect.date;
HtmlUtility.cache.company.innerHTML = prospect.company;
HtmlUtility.cache.location.innerHTML = prospect.location;
HtmlUtility.cache.content.innerHTML = prospect.descriptionHTML;
HtmlUtility.cache.modal.classList.add('open');
HtmlUtility.cache.apply.setAttribute('href', prospect.url);
}
static produceCard(prospect, index, arr) {
console.log(prospect)
return index > 0 ? `<div class="job-scraper__item-wrapper">
<div class="job-scraper__item">
<div class="job-scraper__item-header">
<p class=""><strong>${prospect.company.toUpperCase()}</strong></p>
<p class=""><strong>${prospect.job}</strong></p>
<p class=""><em>From</em> ${prospect.date} | ${prospect.location}</p>
</div>
<div class="job-scraper__item-content">
<p class="">${prospect.truncatedDescription}</p>
</div>
<div class="job-scraper__item-footer">
<p class="job-scraper__item-footer-content"><a data-lead-number="${index}" class="job-scraper__item-button">View Job</a></p>
</div>
</div>
</div>` : '';
}
static closeModal() {
this.cache.modal.classList.remove('open');
}
}
class JobProspect {
constructor(company, location, date, job, url, description, truncated, _data) {
this.company = company;
this.location = location;
this.date = date;
this.job = job;
this.url = url;
this.truncatedDescription = truncated;
this.descriptionHTML = description;
this._data = _data;
}
}
class Scraper {
constructor(jobTitle, city, state, type=null) {
this.jobTitle = jobTitle.replace(/ /g, '+').replace(/,/g, '%2C');
this.city = city.replace(/ /g, '+').replace(/,/g, '%2C');
this.state = state.replace(/ /g, '+').replace(/,/g, '%2C');
this.type = type;
this.query = 'http://allorigins.me/get?url=https%3A//www.indeed.com/jobs%3Fq%3D' + this.jobTitle + '%26l%3D' + this.city + '%252C+' + this.state + '&callback=?';
}
init() {
return $.getJSON(this.query)
.then(data=>data.contents.split('data-tn-component="organicJob"')
.map(el=>el.replace(/\n/g, ''))
.map(el=>{
var prospect = new JobProspect(IndeedParser.getCompany(el), IndeedParser.getLocation(el), IndeedParser.getDate(el), IndeedParser.getTitle(el), IndeedParser.getUrl(el), null, IndeedParser.getTruncation(el), el)
IndeedParser.setDescription(el, this.query, prospect);
return prospect;
}))
.catch(err=>{throw err})
}
renderCards(arr) {
var el = document.getElementById('job-scraper-box');
var preHtml = `<div class="job-scraper__modal-veil">
<div class="job-scraper__modal-wrapper">
<div class="job-scraper__modal-header">
<p><span class="job-scraper__close-box">X</span></p>
<p class="job-scraper__company"></p>
<hr />
<p class="job-scraper__title"></p>
<p class="job-scraper__location"></p>
<p class="job-scraper__date"></p>
<p>. . .</p>
</div>
<div class="job-scraper__modal-container">
</div>
<div class="job-scraper__modal-footer">
<a class="job-scraper__modal-apply" target="_blank">Apply to Job</a>
</div>
</div>
</div>`;
el.insertAdjacentHTML('beforebegin', preHtml);
HtmlUtility.cache = {
modal: document.getElementsByClassName('job-scraper__modal-veil')[0],
company: document.getElementsByClassName('job-scraper__company')[0],
job: document.getElementsByClassName('job-scraper__title')[0],
location: document.getElementsByClassName('job-scraper__location')[0],
date: document.getElementsByClassName('job-scraper__date')[0],
content: document.getElementsByClassName('job-scraper__modal-container')[0],
data: arr,
apply: document.getElementsByClassName('job-scraper__modal-apply')[0]
}
var html = `<div class="job-scraper__items-container">` + arr.map(HtmlUtility.produceCard).join('') + `</div>`;
el.insertAdjacentHTML('beforebegin', html);
el.parentElement.removeChild(el);
var cardButtons = document.getElementsByClassName('job-scraper__item-button');
var closeModalButton = document.getElementsByClassName('job-scraper__close-box')[0];
closeModalButton.addEventListener('click', ()=>{
HtmlUtility.closeModal();
})
for (var i = 0; i < cardButtons.length; i++) {
cardButtons[i].addEventListener('click', e=>{
HtmlUtility.triggerModal(e.srcElement)
});
}
}
}
var Scrapey = new Scraper('Career Services Advisor', 'New York', 'New York');
var Scrape = Scrapey.init();
function log(e) {
console.log(e);
}
Scrape.then(Scrapey.renderCards).catch(err=>{throw err})
$.getJSON('http://allorigins.me/get?url=https%3A//www.monster.com/jobs/search/%3Fq%3Dcareer-services-advisor%26where%3DTampa__2C-FL%26jobid%3D193877545&callback=?')
.then(data=>{console.log(data.contents.split('clickJobTitle').map(el=>el.split('\n')))})
.catch(err=>{console.log(err)});
}
if (document.addEventListener) document.addEventListener("DOMContentLoaded", autorun, false);
else if (document.attachEvent) document.attachEvent("onreadystatechange", autorun);
else window.onload = autorun;
</script>
</body>
</html>