UNPKG

adex-linkedin-scrapper

Version:

Flexible linkedin scrapper developed by Adefemigreat

316 lines (315 loc) 15.6 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __generator = (this && this.__generator) || function (thisArg, body) { var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; function verb(n) { return function (v) { return step([n, v]); }; } function step(op) { if (f) throw new TypeError("Generator is already executing."); while (g && (g = 0, op[0] && (_ = 0)), _) try { if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; if (y = 0, t) op = [op[0] & 2, t.value]; switch (op[0]) { case 0: case 1: t = op; break; case 4: _.label++; return { value: op[1], done: false }; case 5: _.label++; y = op[1]; op = [0]; continue; case 7: op = _.ops.pop(); _.trys.pop(); continue; default: if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } if (t[2]) _.ops.pop(); _.trys.pop(); continue; } op = body.call(thisArg, _); } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; } }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.delay = void 0; var puppeteer_1 = __importDefault(require("puppeteer")); var cheerio_1 = require("cheerio"); var delay = function (ms) { return new Promise(function (res) { return setTimeout(res, ms); }); }; exports.delay = delay; var LinkedInScrapper = /** @class */ (function () { function LinkedInScrapper(cookie, profileLink) { this.cookie = cookie; this.profileLink = profileLink; } LinkedInScrapper.prototype.getPageInfo = function () { return __awaiter(this, void 0, void 0, function () { var browser, page; return __generator(this, function (_a) { switch (_a.label) { case 0: return [4 /*yield*/, puppeteer_1.default.launch({ headless: "new", args: [ '--ignore-certificate-errors', '--no-sandbox', '--disable-setuid-sandbox', '--disable-accelerated-2d-canvas', '--disable-gpu' ] })]; case 1: browser = _a.sent(); return [4 /*yield*/, browser.newPage()]; case 2: page = _a.sent(); page.setDefaultNavigationTimeout(0); return [4 /*yield*/, page.setUserAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36")]; case 3: _a.sent(); return [4 /*yield*/, page.setCookie({ name: "li_at", value: this.cookie, domain: ".linkedin.com", })]; case 4: _a.sent(); return [2 /*return*/, { browser: browser, page: page, }]; } }); }); }; LinkedInScrapper.prototype.getProfileInfo = function () { return __awaiter(this, void 0, void 0, function () { var _a, page, browser, pageContent, $, name_1, heading, address, about, err_1; return __generator(this, function (_b) { switch (_b.label) { case 0: _b.trys.push([0, 6, , 7]); return [4 /*yield*/, this.getPageInfo()]; case 1: _a = _b.sent(), page = _a.page, browser = _a.browser; return [4 /*yield*/, page.goto(this.profileLink)]; case 2: _b.sent(); return [4 /*yield*/, page.waitForSelector("img")]; case 3: _b.sent(); return [4 /*yield*/, page.content()]; case 4: pageContent = _b.sent(); return [4 /*yield*/, browser.close()]; case 5: _b.sent(); $ = (0, cheerio_1.load)(pageContent); name_1 = $("#ember27").text().trim(); heading = $("#ember28").text().trim(); address = $(".pv-text-details__left-panel.mt2 span.text-body-small") .text() .trim(); about = $('.pv-shared-text-with-see-more div.inline-show-more-text span[aria-hidden="true"]') .first() .text() .trim(); return [2 /*return*/, { name: name_1, heading: heading, address: address, about: about, }]; case 6: err_1 = _b.sent(); throw new Error(err_1); case 7: return [2 /*return*/]; } }); }); }; LinkedInScrapper.prototype.getExperiences = function () { return __awaiter(this, void 0, void 0, function () { var _a, page, browser, pageContent, $_1, experiences_1, err_2; return __generator(this, function (_b) { switch (_b.label) { case 0: return [4 /*yield*/, (0, exports.delay)(1000)]; case 1: _b.sent(); _b.label = 2; case 2: _b.trys.push([2, 8, , 9]); return [4 /*yield*/, this.getPageInfo()]; case 3: _a = _b.sent(), page = _a.page, browser = _a.browser; return [4 /*yield*/, page.goto(this.profileLink + "/details/experience")]; case 4: _b.sent(); return [4 /*yield*/, page.waitForSelector("h2.t-20.t-bold.ph3.pt3.pb2")]; case 5: _b.sent(); return [4 /*yield*/, page.content()]; case 6: pageContent = _b.sent(); return [4 /*yield*/, browser.close()]; case 7: _b.sent(); $_1 = (0, cheerio_1.load)(pageContent); experiences_1 = []; $_1(".pvs-entity").each(function (_, elem) { var position = $_1(elem) .find("div.display-flex.align-items-center.mr1.t-bold span[aria-hidden='true']") .first() .text() .trim(); // For Company var company = $_1(elem) .find("span.t-14.t-normal span[aria-hidden='true']") .first() .text() .trim(); // For Duration var duration = $_1(elem) .find("span.t-14.t-normal.t-black--light span[aria-hidden='true']") .first() .text() .trim(); // For About var about = $_1(elem) .find("div.display-flex.align-items-center.t-14.t-normal.t-black span[aria-hidden='true']") .first() .text() .trim(); experiences_1.push({ position: position, company: company, duration: duration, about: about, }); }); return [2 /*return*/, experiences_1]; case 8: err_2 = _b.sent(); throw new Error(err_2); case 9: return [2 /*return*/]; } }); }); }; LinkedInScrapper.prototype.getEducations = function () { return __awaiter(this, void 0, void 0, function () { var _a, page, browser, pageContent, $_2, educations_1, err_3; return __generator(this, function (_b) { switch (_b.label) { case 0: return [4 /*yield*/, (0, exports.delay)(2000)]; case 1: _b.sent(); _b.label = 2; case 2: _b.trys.push([2, 8, , 9]); return [4 /*yield*/, this.getPageInfo()]; case 3: _a = _b.sent(), page = _a.page, browser = _a.browser; return [4 /*yield*/, page.goto(this.profileLink + "/details/education")]; case 4: _b.sent(); return [4 /*yield*/, page.waitForSelector("h2.t-20.t-bold.ph3.pt3.pb2")]; case 5: _b.sent(); return [4 /*yield*/, page.content()]; case 6: pageContent = _b.sent(); return [4 /*yield*/, browser.close()]; case 7: _b.sent(); $_2 = (0, cheerio_1.load)(pageContent); educations_1 = []; $_2(".pvs-entity").each(function (_, elem) { var schoolName = $_2(elem) .find("div.display-flex.align-items-center.mr1.t-bold span[aria-hidden='true']") .first() .text() .trim(); // For Degree/Field var degreeField = $_2(elem) .find("span.t-14.t-normal span[aria-hidden='true']") .first() .text() .trim(); // For Duration var duration = $_2(elem) .find("span.t-14.t-normal.t-black--light span[aria-hidden='true']") .first() .text() .trim(); educations_1.push({ schoolName: schoolName, degreeField: degreeField, duration: duration, }); }); return [2 /*return*/, educations_1]; case 8: err_3 = _b.sent(); throw new Error(err_3); case 9: return [2 /*return*/]; } }); }); }; LinkedInScrapper.prototype.getSkills = function () { return __awaiter(this, void 0, void 0, function () { var _a, page, browser, pageContent, $_3, skills_1, err_4; return __generator(this, function (_b) { switch (_b.label) { case 0: return [4 /*yield*/, (0, exports.delay)(3000)]; case 1: _b.sent(); _b.label = 2; case 2: _b.trys.push([2, 8, , 9]); return [4 /*yield*/, this.getPageInfo()]; case 3: _a = _b.sent(), page = _a.page, browser = _a.browser; return [4 /*yield*/, page.goto(this.profileLink + "/details/skills")]; case 4: _b.sent(); return [4 /*yield*/, page.waitForSelector("h2.t-20.t-bold.ph3.pt3.pb2")]; case 5: _b.sent(); return [4 /*yield*/, page.content()]; case 6: pageContent = _b.sent(); return [4 /*yield*/, browser.close()]; case 7: _b.sent(); $_3 = (0, cheerio_1.load)(pageContent); skills_1 = []; $_3(".pvs-entity").each(function (_, elem) { var skill = $_3(elem) .find("div.display-flex.align-items-center.mr1.hoverable-link-text.t-bold span[aria-hidden='true']") .first() .text() .trim(); if (skill) { skills_1.push(skill); } }); return [2 /*return*/, skills_1]; case 8: err_4 = _b.sent(); throw new Error(err_4); case 9: return [2 /*return*/]; } }); }); }; return LinkedInScrapper; }()); exports.default = LinkedInScrapper;