UNPKG

linkedin-jobs-api

Version:

advanced node.js package for getting job listings from LinkedIn

329 lines (286 loc) 9.46 kB
const cheerio = require("cheerio"); const axios = require("axios"); const randomUseragent = require("random-useragent"); // Utility functions const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); // Cache implementation class JobCache { constructor() { this.cache = new Map(); this.TTL = 1000 * 60 * 60; // 1 hour } set(key, value) { this.cache.set(key, { data: value, timestamp: Date.now(), }); } get(key) { const item = this.cache.get(key); if (!item) return null; if (Date.now() - item.timestamp > this.TTL) { this.cache.delete(key); return null; } return item.data; } clear() { const now = Date.now(); for (const [key, value] of this.cache.entries()) { if (now - value.timestamp > this.TTL) { this.cache.delete(key); } } } } const cache = new JobCache(); // Generate a unique cache key based on the query parameters Query.prototype.getCacheKey = function () { return `${this.url(0)}_limit:${this.limit}`; }; // Main query function module.exports.query = (queryObject) => { const query = new Query(queryObject); return query.getJobs(); }; // Query constructor function Query(queryObj) { this.host = queryObj.host || "www.linkedin.com"; this.keyword = queryObj.keyword?.trim().replace(/\s+/g, "+") || ""; this.location = queryObj.location?.trim().replace(/\s+/g, "+") || ""; this.dateSincePosted = queryObj.dateSincePosted || ""; this.jobType = queryObj.jobType || ""; this.remoteFilter = queryObj.remoteFilter || ""; this.salary = queryObj.salary || ""; this.experienceLevel = queryObj.experienceLevel || ""; this.sortBy = queryObj.sortBy || ""; this.limit = Number(queryObj.limit) || 0; this.page = Number(queryObj.page) || 0; this.has_verification = queryObj.has_verification || false; this.under_10_applicants = queryObj.under_10_applicants || false; } // Query prototype methods Query.prototype.getDateSincePosted = function () { const dateRange = { "past month": "r2592000", "past week": "r604800", "24hr": "r86400", }; return dateRange[this.dateSincePosted.toLowerCase()] || ""; }; Query.prototype.getExperienceLevel = function () { const experienceRange = { internship: "1", "entry level": "2", associate: "3", senior: "4", director: "5", executive: "6", }; return experienceRange[this.experienceLevel.toLowerCase()] || ""; }; Query.prototype.getJobType = function () { const jobTypeRange = { "full time": "F", "full-time": "F", "part time": "P", "part-time": "P", contract: "C", temporary: "T", volunteer: "V", internship: "I", }; return jobTypeRange[this.jobType.toLowerCase()] || ""; }; Query.prototype.getRemoteFilter = function () { const remoteFilterRange = { "on-site": "1", "on site": "1", remote: "2", hybrid: "3", }; return remoteFilterRange[this.remoteFilter.toLowerCase()] || ""; }; Query.prototype.getSalary = function () { const salaryRange = { 40000: "1", 60000: "2", 80000: "3", 100000: "4", 120000: "5", }; return salaryRange[this.salary] || ""; }; Query.prototype.getHasVerification = function () { return this.has_verification ? "true" : "false"; }; Query.prototype.getUnder10Applicants = function () { return this.under_10_applicants ? "true" : "false"; }; Query.prototype.getPage = function () { return this.page * 25; }; Query.prototype.url = function (start) { let query = `https://${this.host}/jobs-guest/jobs/api/seeMoreJobPostings/search?`; const params = new URLSearchParams(); if (this.keyword) params.append("keywords", this.keyword); if (this.location) params.append("location", this.location); if (this.getDateSincePosted()) params.append("f_TPR", this.getDateSincePosted()); if (this.getSalary()) params.append("f_SB2", this.getSalary()); if (this.getExperienceLevel()) params.append("f_E", this.getExperienceLevel()); if (this.getRemoteFilter()) params.append("f_WT", this.getRemoteFilter()); if (this.getJobType()) params.append("f_JT", this.getJobType()); if (this.getHasVerification()) params.append("f_VJ", this.getHasVerification()); if (this.getUnder10Applicants()) params.append("f_EA", this.getUnder10Applicants()); params.append("start", start + this.getPage()); if (this.sortBy === "recent") params.append("sortBy", "DD"); else if (this.sortBy === "relevant") params.append("sortBy", "R"); return query + params.toString(); }; Query.prototype.getJobs = async function () { let allJobs = []; let start = 0; const BATCH_SIZE = 25; let hasMore = true; let consecutiveErrors = 0; const MAX_CONSECUTIVE_ERRORS = 3; console.log(this.url()); console.log(this.getCacheKey()); try { // Check cache first const cacheKey = this.getCacheKey(); const cachedJobs = cache.get(cacheKey); if (cachedJobs) { console.log("Returning cached results"); return cachedJobs; } while (hasMore) { try { const jobs = await this.fetchJobBatch(start); if (!jobs || jobs.length === 0) { hasMore = false; break; } allJobs.push(...jobs); console.log(`Fetched ${jobs.length} jobs. Total: ${allJobs.length}`); if (this.limit && allJobs.length >= this.limit) { allJobs = allJobs.slice(0, this.limit); break; } // Reset error counter on successful fetch consecutiveErrors = 0; start += BATCH_SIZE; // Add reasonable delay between requests await delay(2000 + Math.random() * 1000); } catch (error) { consecutiveErrors++; console.error( `Error fetching batch (attempt ${consecutiveErrors}):`, error.message ); if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) { console.log("Max consecutive errors reached. Stopping."); break; } // Exponential backoff await delay(Math.pow(2, consecutiveErrors) * 1000); } } // Cache results if we got any if (allJobs.length > 0) { cache.set(this.getCacheKey(), allJobs); } return allJobs; } catch (error) { console.error("Fatal error in job fetching:", error); throw error; } }; Query.prototype.fetchJobBatch = async function (start) { const headers = { "User-Agent": randomUseragent.getRandom(), Accept: "application/json, text/javascript, */*; q=0.01", "Accept-Language": "en-US,en;q=0.9", "Accept-Encoding": "gzip, deflate, br", Referer: "https://www.linkedin.com/jobs", "X-Requested-With": "XMLHttpRequest", Connection: "keep-alive", "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin", "Cache-Control": "no-cache", Pragma: "no-cache", }; try { const response = await axios.get(this.url(start), { headers, validateStatus: function (status) { return status === 200; }, timeout: 10000, }); return parseJobList(response.data); } catch (error) { if (error.response?.status === 429) { throw new Error("Rate limit reached"); } throw error; } }; function parseJobList(jobData) { try { const $ = cheerio.load(jobData); const jobs = $("li"); return jobs .map((index, element) => { try { const job = $(element); const position = job.find(".base-search-card__title").text().trim(); const company = job.find(".base-search-card__subtitle").text().trim(); const location = job.find(".job-search-card__location").text().trim(); const dateElement = job.find("time"); const date = dateElement.attr("datetime"); const salary = job .find(".job-search-card__salary-info") .text() .trim() .replace(/\s+/g, " "); const jobUrl = job.find(".base-card__full-link").attr("href"); const companyLogo = job .find(".artdeco-entity-image") .attr("data-delayed-url"); const agoTime = job.find(".job-search-card__listdate").text().trim(); // Only return job if we have at least position and company if (!position || !company) { return null; } return { position, company, location, date, salary: salary || "Not specified", jobUrl: jobUrl || "", companyLogo: companyLogo || "", agoTime: agoTime || "", }; } catch (err) { console.warn(`Error parsing job at index ${index}:`, err.message); return null; } }) .get() .filter(Boolean); } catch (error) { console.error("Error parsing job list:", error); return []; } } // Export additional utilities for testing and monitoring module.exports.JobCache = JobCache; module.exports.clearCache = () => cache.clear(); module.exports.getCacheSize = () => cache.cache.size;