@jomari-wp/linkedin-jobs-scraper
Version:
Scrape public available jobs on Linkedin using headless browser
109 lines (108 loc) • 4.53 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
const browser_1 = require("../utils/browser");
const utils_1 = require("../utils/utils");
const index_1 = require("../index");
describe('[TEST]', () => {
jest.setTimeout(240000);
const onDataFn = (data) => {
expect(data.query).toBeDefined();
expect(data.location).toBeDefined();
expect(data.jobId).toBeDefined();
expect(data.title).toBeDefined();
expect(data.company).toBeDefined();
expect(data.place).toBeDefined();
expect(data.date).toBeDefined();
expect(data.description).toBeDefined();
expect(data.descriptionHTML).toBeDefined();
expect(data.link).toBeDefined();
expect(data.location.length).toBeGreaterThan(0);
expect(data.jobId.length).toBeGreaterThan(0);
expect(data.title.length).toBeGreaterThan(0);
expect(data.place.length).toBeGreaterThan(0);
expect(data.description.length).toBeGreaterThan(0);
expect(data.descriptionHTML.length).toBeGreaterThan(0);
expect(() => new URL(data.link)).not.toThrow();
if (data.applyLink) {
expect(() => new URL(data.applyLink)).not.toThrow();
}
if (data.companyLink) {
expect(() => new URL(data.companyLink)).not.toThrow();
}
if (data.companyImgLink) {
expect(() => new URL(data.companyImgLink)).not.toThrow();
}
};
const descriptionFn = () => document.querySelector(".jobs-description")
.innerText
.replace(/[\s\n\r]+/g, " ")
.trim();
const scraper = new index_1.LinkedinScraper({
headless: true,
args: [
"--remote-debugging-address=0.0.0.0",
"--remote-debugging-port=9222",
],
slowMo: 200,
});
afterEach(() => __awaiter(void 0, void 0, void 0, function* () {
// Necessary to avoid Jest error: `ReferenceError: You are trying to `import` a file after the Jest environment has been torn down.`
yield (0, utils_1.sleep)(2000);
}));
const queriesSerial1 = [
{
query: '',
},
{
query: "c#",
options: {
locations: ['Finland'],
optimize: true,
limit: 33,
descriptionFn,
filters: {
time: index_1.timeFilter.WEEK,
experience: index_1.experienceLevelFilter.MID_SENIOR,
}
},
},
{
query: 'Engineer',
options: {
locations: ['United States'],
limit: 27,
filters: {
companyJobsUrl: "https://www.linkedin.com/jobs/search/?f_C=1441%2C10667&geoId=101165590&keywords=engineer&location=United%20Kingdom",
},
},
},
];
const globalOptions = {
limit: 10,
locations: ['United Kingdom'],
filters: {
time: index_1.timeFilter.MONTH,
relevance: index_1.relevanceFilter.RECENT,
},
};
it('Authenticated strategy', () => __awaiter(void 0, void 0, void 0, function* () {
// expect(process.env.LI_AT_COOKIE).toBeDefined();
// expect(process.env.LI_AT_COOKIE!.length).toBeGreaterThan(0);
scraper.on(index_1.events.scraper.data, onDataFn);
scraper.on(index_1.events.scraper.invalidSession, () => { console.error("Invalid session!"); });
scraper.on(index_1.events.scraper.error, (err) => { console.error(err); });
scraper.on(index_1.events.scraper.end, () => console.log("\nE N D (ツ)_.\\m/"));
yield scraper.run(queriesSerial1, globalOptions);
yield scraper.close();
yield (0, browser_1.killChromium)();
}));
});