UNPKG

@microfox/puppeteer-linkedin

Version:

Puppeteer LinkedIn - Run puppeteer on LinkedIn

1 lines 121 kB
{"version":3,"sources":["../node_modules/dotenv/package.json","../node_modules/dotenv/lib/main.js","../src/index.ts","../src/core/public.ts","../src/utils/index.ts","../src/public/articles.ts","../src/auth/posts.ts","../src/auth/jobs.ts","../src/utils/scrolling.ts","../src/auth/users.ts","../src/auth/sales.ts","../src/auth/search.ts","../src/core/authenticated.ts","../src/public/people.ts","../src/public/jobs.ts"],"sourcesContent":["{\n \"name\": \"dotenv\",\n \"version\": \"16.5.0\",\n \"description\": \"Loads environment variables from .env file\",\n \"main\": \"lib/main.js\",\n \"types\": \"lib/main.d.ts\",\n \"exports\": {\n \".\": {\n \"types\": \"./lib/main.d.ts\",\n \"require\": \"./lib/main.js\",\n \"default\": \"./lib/main.js\"\n },\n \"./config\": \"./config.js\",\n \"./config.js\": \"./config.js\",\n \"./lib/env-options\": \"./lib/env-options.js\",\n \"./lib/env-options.js\": \"./lib/env-options.js\",\n \"./lib/cli-options\": \"./lib/cli-options.js\",\n \"./lib/cli-options.js\": \"./lib/cli-options.js\",\n \"./package.json\": \"./package.json\"\n },\n \"scripts\": {\n \"dts-check\": \"tsc --project tests/types/tsconfig.json\",\n \"lint\": \"standard\",\n \"pretest\": \"npm run lint && npm run dts-check\",\n \"test\": \"tap run --allow-empty-coverage --disable-coverage --timeout=60000\",\n \"test:coverage\": \"tap run --show-full-coverage --timeout=60000 --coverage-report=lcov\",\n \"prerelease\": \"npm test\",\n \"release\": \"standard-version\"\n },\n \"repository\": {\n \"type\": \"git\",\n \"url\": \"git://github.com/motdotla/dotenv.git\"\n },\n \"homepage\": \"https://github.com/motdotla/dotenv#readme\",\n \"funding\": \"https://dotenvx.com\",\n \"keywords\": [\n \"dotenv\",\n \"env\",\n \".env\",\n \"environment\",\n \"variables\",\n \"config\",\n \"settings\"\n ],\n \"readmeFilename\": \"README.md\",\n \"license\": \"BSD-2-Clause\",\n \"devDependencies\": {\n \"@types/node\": \"^18.11.3\",\n \"decache\": \"^4.6.2\",\n \"sinon\": \"^14.0.1\",\n \"standard\": \"^17.0.0\",\n \"standard-version\": \"^9.5.0\",\n \"tap\": \"^19.2.0\",\n \"typescript\": \"^4.8.4\"\n },\n \"engines\": {\n \"node\": \">=12\"\n },\n \"browser\": {\n \"fs\": false\n }\n}\n","const fs = require('fs')\nconst path = require('path')\nconst os = require('os')\nconst crypto = require('crypto')\nconst packageJson = require('../package.json')\n\nconst version = packageJson.version\n\nconst LINE = /(?:^|^)\\s*(?:export\\s+)?([\\w.-]+)(?:\\s*=\\s*?|:\\s+?)(\\s*'(?:\\\\'|[^'])*'|\\s*\"(?:\\\\\"|[^\"])*\"|\\s*`(?:\\\\`|[^`])*`|[^#\\r\\n]+)?\\s*(?:#.*)?(?:$|$)/mg\n\n// Parse src into an Object\nfunction parse (src) {\n const obj = {}\n\n // Convert buffer to string\n let lines = src.toString()\n\n // Convert line breaks to same format\n lines = lines.replace(/\\r\\n?/mg, '\\n')\n\n let match\n while ((match = LINE.exec(lines)) != null) {\n const key = match[1]\n\n // Default undefined or null to empty string\n let value = (match[2] || '')\n\n // Remove whitespace\n value = value.trim()\n\n // Check if double quoted\n const maybeQuote = value[0]\n\n // Remove surrounding quotes\n value = value.replace(/^(['\"`])([\\s\\S]*)\\1$/mg, '$2')\n\n // Expand newlines if double quoted\n if (maybeQuote === '\"') {\n value = value.replace(/\\\\n/g, '\\n')\n value = value.replace(/\\\\r/g, '\\r')\n }\n\n // Add to object\n obj[key] = value\n }\n\n return obj\n}\n\nfunction _parseVault (options) {\n const vaultPath = _vaultPath(options)\n\n // Parse .env.vault\n const result = DotenvModule.configDotenv({ path: vaultPath })\n if (!result.parsed) {\n const err = new Error(`MISSING_DATA: Cannot parse ${vaultPath} for an unknown reason`)\n err.code = 'MISSING_DATA'\n throw err\n }\n\n // handle scenario for comma separated keys - for use with key rotation\n // example: DOTENV_KEY=\"dotenv://:key_1234@dotenvx.com/vault/.env.vault?environment=prod,dotenv://:key_7890@dotenvx.com/vault/.env.vault?environment=prod\"\n const keys = _dotenvKey(options).split(',')\n const length = keys.length\n\n let decrypted\n for (let i = 0; i < length; i++) {\n try {\n // Get full key\n const key = keys[i].trim()\n\n // Get instructions for decrypt\n const attrs = _instructions(result, key)\n\n // Decrypt\n decrypted = DotenvModule.decrypt(attrs.ciphertext, attrs.key)\n\n break\n } catch (error) {\n // last key\n if (i + 1 >= length) {\n throw error\n }\n // try next key\n }\n }\n\n // Parse decrypted .env string\n return DotenvModule.parse(decrypted)\n}\n\nfunction _warn (message) {\n console.log(`[dotenv@${version}][WARN] ${message}`)\n}\n\nfunction _debug (message) {\n console.log(`[dotenv@${version}][DEBUG] ${message}`)\n}\n\nfunction _dotenvKey (options) {\n // prioritize developer directly setting options.DOTENV_KEY\n if (options && options.DOTENV_KEY && options.DOTENV_KEY.length > 0) {\n return options.DOTENV_KEY\n }\n\n // secondary infra already contains a DOTENV_KEY environment variable\n if (process.env.DOTENV_KEY && process.env.DOTENV_KEY.length > 0) {\n return process.env.DOTENV_KEY\n }\n\n // fallback to empty string\n return ''\n}\n\nfunction _instructions (result, dotenvKey) {\n // Parse DOTENV_KEY. Format is a URI\n let uri\n try {\n uri = new URL(dotenvKey)\n } catch (error) {\n if (error.code === 'ERR_INVALID_URL') {\n const err = new Error('INVALID_DOTENV_KEY: Wrong format. Must be in valid uri format like dotenv://:key_1234@dotenvx.com/vault/.env.vault?environment=development')\n err.code = 'INVALID_DOTENV_KEY'\n throw err\n }\n\n throw error\n }\n\n // Get decrypt key\n const key = uri.password\n if (!key) {\n const err = new Error('INVALID_DOTENV_KEY: Missing key part')\n err.code = 'INVALID_DOTENV_KEY'\n throw err\n }\n\n // Get environment\n const environment = uri.searchParams.get('environment')\n if (!environment) {\n const err = new Error('INVALID_DOTENV_KEY: Missing environment part')\n err.code = 'INVALID_DOTENV_KEY'\n throw err\n }\n\n // Get ciphertext payload\n const environmentKey = `DOTENV_VAULT_${environment.toUpperCase()}`\n const ciphertext = result.parsed[environmentKey] // DOTENV_VAULT_PRODUCTION\n if (!ciphertext) {\n const err = new Error(`NOT_FOUND_DOTENV_ENVIRONMENT: Cannot locate environment ${environmentKey} in your .env.vault file.`)\n err.code = 'NOT_FOUND_DOTENV_ENVIRONMENT'\n throw err\n }\n\n return { ciphertext, key }\n}\n\nfunction _vaultPath (options) {\n let possibleVaultPath = null\n\n if (options && options.path && options.path.length > 0) {\n if (Array.isArray(options.path)) {\n for (const filepath of options.path) {\n if (fs.existsSync(filepath)) {\n possibleVaultPath = filepath.endsWith('.vault') ? filepath : `${filepath}.vault`\n }\n }\n } else {\n possibleVaultPath = options.path.endsWith('.vault') ? options.path : `${options.path}.vault`\n }\n } else {\n possibleVaultPath = path.resolve(process.cwd(), '.env.vault')\n }\n\n if (fs.existsSync(possibleVaultPath)) {\n return possibleVaultPath\n }\n\n return null\n}\n\nfunction _resolveHome (envPath) {\n return envPath[0] === '~' ? path.join(os.homedir(), envPath.slice(1)) : envPath\n}\n\nfunction _configVault (options) {\n const debug = Boolean(options && options.debug)\n if (debug) {\n _debug('Loading env from encrypted .env.vault')\n }\n\n const parsed = DotenvModule._parseVault(options)\n\n let processEnv = process.env\n if (options && options.processEnv != null) {\n processEnv = options.processEnv\n }\n\n DotenvModule.populate(processEnv, parsed, options)\n\n return { parsed }\n}\n\nfunction configDotenv (options) {\n const dotenvPath = path.resolve(process.cwd(), '.env')\n let encoding = 'utf8'\n const debug = Boolean(options && options.debug)\n\n if (options && options.encoding) {\n encoding = options.encoding\n } else {\n if (debug) {\n _debug('No encoding is specified. UTF-8 is used by default')\n }\n }\n\n let optionPaths = [dotenvPath] // default, look for .env\n if (options && options.path) {\n if (!Array.isArray(options.path)) {\n optionPaths = [_resolveHome(options.path)]\n } else {\n optionPaths = [] // reset default\n for (const filepath of options.path) {\n optionPaths.push(_resolveHome(filepath))\n }\n }\n }\n\n // Build the parsed data in a temporary object (because we need to return it). Once we have the final\n // parsed data, we will combine it with process.env (or options.processEnv if provided).\n let lastError\n const parsedAll = {}\n for (const path of optionPaths) {\n try {\n // Specifying an encoding returns a string instead of a buffer\n const parsed = DotenvModule.parse(fs.readFileSync(path, { encoding }))\n\n DotenvModule.populate(parsedAll, parsed, options)\n } catch (e) {\n if (debug) {\n _debug(`Failed to load ${path} ${e.message}`)\n }\n lastError = e\n }\n }\n\n let processEnv = process.env\n if (options && options.processEnv != null) {\n processEnv = options.processEnv\n }\n\n DotenvModule.populate(processEnv, parsedAll, options)\n\n if (lastError) {\n return { parsed: parsedAll, error: lastError }\n } else {\n return { parsed: parsedAll }\n }\n}\n\n// Populates process.env from .env file\nfunction config (options) {\n // fallback to original dotenv if DOTENV_KEY is not set\n if (_dotenvKey(options).length === 0) {\n return DotenvModule.configDotenv(options)\n }\n\n const vaultPath = _vaultPath(options)\n\n // dotenvKey exists but .env.vault file does not exist\n if (!vaultPath) {\n _warn(`You set DOTENV_KEY but you are missing a .env.vault file at ${vaultPath}. Did you forget to build it?`)\n\n return DotenvModule.configDotenv(options)\n }\n\n return DotenvModule._configVault(options)\n}\n\nfunction decrypt (encrypted, keyStr) {\n const key = Buffer.from(keyStr.slice(-64), 'hex')\n let ciphertext = Buffer.from(encrypted, 'base64')\n\n const nonce = ciphertext.subarray(0, 12)\n const authTag = ciphertext.subarray(-16)\n ciphertext = ciphertext.subarray(12, -16)\n\n try {\n const aesgcm = crypto.createDecipheriv('aes-256-gcm', key, nonce)\n aesgcm.setAuthTag(authTag)\n return `${aesgcm.update(ciphertext)}${aesgcm.final()}`\n } catch (error) {\n const isRange = error instanceof RangeError\n const invalidKeyLength = error.message === 'Invalid key length'\n const decryptionFailed = error.message === 'Unsupported state or unable to authenticate data'\n\n if (isRange || invalidKeyLength) {\n const err = new Error('INVALID_DOTENV_KEY: It must be 64 characters long (or more)')\n err.code = 'INVALID_DOTENV_KEY'\n throw err\n } else if (decryptionFailed) {\n const err = new Error('DECRYPTION_FAILED: Please check your DOTENV_KEY')\n err.code = 'DECRYPTION_FAILED'\n throw err\n } else {\n throw error\n }\n }\n}\n\n// Populate process.env with parsed values\nfunction populate (processEnv, parsed, options = {}) {\n const debug = Boolean(options && options.debug)\n const override = Boolean(options && options.override)\n\n if (typeof parsed !== 'object') {\n const err = new Error('OBJECT_REQUIRED: Please check the processEnv argument being passed to populate')\n err.code = 'OBJECT_REQUIRED'\n throw err\n }\n\n // Set process.env\n for (const key of Object.keys(parsed)) {\n if (Object.prototype.hasOwnProperty.call(processEnv, key)) {\n if (override === true) {\n processEnv[key] = parsed[key]\n }\n\n if (debug) {\n if (override === true) {\n _debug(`\"${key}\" is already defined and WAS overwritten`)\n } else {\n _debug(`\"${key}\" is already defined and was NOT overwritten`)\n }\n }\n } else {\n processEnv[key] = parsed[key]\n }\n }\n}\n\nconst DotenvModule = {\n configDotenv,\n _configVault,\n _parseVault,\n config,\n decrypt,\n parse,\n populate\n}\n\nmodule.exports.configDotenv = DotenvModule.configDotenv\nmodule.exports._configVault = DotenvModule._configVault\nmodule.exports._parseVault = DotenvModule._parseVault\nmodule.exports.config = DotenvModule.config\nmodule.exports.decrypt = DotenvModule.decrypt\nmodule.exports.parse = DotenvModule.parse\nmodule.exports.populate = DotenvModule.populate\n\nmodule.exports = DotenvModule\n","export * from './core/public';\nexport * from './public/articles';\nexport * from './core/authenticated';\nexport * from './public/people';\nexport * from './public/jobs';\n","import { puppeteerLaunchProps } from '@microfox/puppeteer-sls';\nimport puppeteer, { Browser, Page } from 'puppeteer-core';\nimport dotenv from 'dotenv';\n\ndotenv.config();\n\nexport class LinkedInPublic {\n /**\n * The Puppeteer browser instance.\n */\n public browser: Browser | null = null;\n /**\n * The Puppeteer page instance.\n */\n public page: Page | null = null;\n\n constructor() { }\n\n async launch(options?: { headless?: boolean }) {\n const launchProps = await puppeteerLaunchProps();\n const isLocal = process.env.IS_OFFLINE || process.env.SERVERLESS_OFFLINE;\n\n this.browser = await puppeteer.launch({\n ...launchProps,\n headless: options?.headless ?? true,\n slowMo: isLocal ? 50 : 0,\n });\n this.page = await this.browser.newPage();\n await this.page.setUserAgent(\n 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',\n );\n await this.page.setViewport({ width: 1920, height: 1080 });\n }\n\n async goto(url: string, options?: Parameters<Page['goto']>[1]) {\n if (!this.page) {\n throw new Error('Page not initialized. Call launch() first.');\n }\n return this.page.goto(url, options);\n }\n\n async close() {\n if (this.browser) {\n await this.browser.close();\n }\n }\n} ","import * as fs from 'fs';\nimport * as path from 'path';\nimport { URL } from 'url';\n\nexport function urlToFolderPath(urlString: string, baseDir: string = 'outputs'): string {\n try {\n const url = new URL(urlString);\n // Remove leading/trailing slashes and split pathname\n const pathSegments = url.pathname.replace(/^\\/|\\/$/g, '').split('/');\n \n const dirPath = path.join(process.cwd(), baseDir, ...pathSegments);\n\n if (!fs.existsSync(dirPath)) {\n fs.mkdirSync(dirPath, { recursive: true });\n }\n \n return dirPath;\n } catch (error) {\n console.error('Invalid URL:', urlString);\n // Return a default path or handle error as needed\n const defaultPath = path.join(process.cwd(), baseDir, 'invalid-urls');\n if (!fs.existsSync(defaultPath)) {\n fs.mkdirSync(defaultPath, { recursive: true });\n }\n return defaultPath;\n }\n}\n\nexport function delay(ms: number) {\n return new Promise((resolve) => setTimeout(resolve, ms));\n}\n\nexport function slugify(text: string): string {\n return text\n .toString()\n .toLowerCase()\n .replace(/\\s+/g, '-') // Replace spaces with -\n .replace(/[^\\w\\-]+/g, '') // Remove all non-word chars\n .replace(/\\-\\-+/g, '-') // Replace multiple - with single -\n .replace(/^-+/, '') // Trim - from start of text\n .replace(/-+$/, ''); // Trim - from end of text\n}\n\nexport async function retry<T>(\n fn: () => Promise<T>,\n retries: number = 3,\n delayMs: number = 1000,\n): Promise<T> {\n let lastError: Error | undefined;\n for (let i = 0; i < retries; i++) {\n try {\n return await fn();\n } catch (error) {\n lastError = error as Error;\n if (i < retries - 1) {\n await delay(delayMs * Math.pow(2, i)); // Exponential backoff\n }\n }\n }\n throw lastError;\n} ","import { LinkedInPublic } from '../core/public';\nimport { delay } from '../utils/index';\nimport { ElementHandle, Page } from 'puppeteer-core';\n\n// --- Start of new lower-level scraping functions ---\n\nasync function scrapeCollaborativeArticleSummary(\n card: ElementHandle,\n): Promise<CollaborativeArticleSummary | null> {\n return card.evaluate((c) => {\n const linkElement = c.querySelector('a');\n if (!linkElement) return null;\n\n const titleElement = c.querySelector('h2');\n // This selector works for both the main feed and topic pages\n const topicElements = c.querySelectorAll(\n 'a.tagged-topic, .content-hub-tagged-topics a',\n );\n\n const title = titleElement?.innerText.trim() ?? '';\n const url = linkElement.href;\n const topics = Array.from(topicElements).map((el) => ({\n name: el.textContent?.trim() ?? '',\n url: (el as HTMLAnchorElement).href,\n }));\n\n if (title && url) {\n return { title, url, topics };\n }\n return null;\n });\n}\n\nasync function scrapePerspective(\n contribution: ElementHandle,\n): Promise<Perspective | null> {\n return contribution.evaluate((c) => {\n const authorName =\n c.querySelector('.contribution-author__name')?.textContent?.trim() ?? '';\n const headline =\n c.querySelector('.contribution-author__headline')?.textContent?.trim() ??\n '';\n const text = c.querySelector('.contribution-text')?.textContent?.trim() ?? '';\n const likesText =\n c\n .querySelector('[data-test-id=\"social-actions__reactions\"]')\n ?.getAttribute('data-num-reactions') ?? '0';\n const likes = parseInt(likesText, 10);\n\n // Skip if it's a \"Show more\" button or similar non-content item\n if (!authorName) {\n return null;\n }\n\n return {\n author: {\n name: authorName,\n headline,\n },\n text,\n likes,\n };\n });\n}\n\nexport async function scrapeArticleSection(\n segment: ElementHandle,\n): Promise<ArticleSection | null> {\n const section: ArticleSection | null = await segment.evaluate((s) => {\n const sectionTitle =\n s.querySelector('h2 > span:last-of-type')?.textContent?.trim() ??\n s.querySelector('h2')?.textContent?.trim() ??\n '';\n const content =\n s.querySelector('.article-main__content p')?.textContent?.trim() ?? '';\n\n if (!sectionTitle) return null;\n\n return { title: sectionTitle, content, perspectives: [] };\n });\n\n if (!section) return null;\n\n const perspectiveElements = await segment.$$('li.contribution-list-item');\n const perspectives: Perspective[] = [];\n\n for (const el of perspectiveElements) {\n // Check for and click a \"show more\" button within the list item itself\n const showMoreButton = await el.$('button:not([aria-label])'); // A generic button selector\n if (showMoreButton) {\n try {\n await showMoreButton.click();\n await delay(500); // Wait for content to load\n } catch (e) {\n // Ignore errors if button is not clickable\n }\n }\n const p = await scrapePerspective(el);\n if (p) {\n perspectives.push(p);\n }\n }\n\n section.perspectives = perspectives;\n\n return section;\n}\n\n// --- End of new lower-level scraping functions ---\n\nexport interface Perspective {\n author: {\n name: string;\n headline: string;\n };\n text: string;\n likes: number;\n}\n\nexport interface ArticleSection {\n title: string;\n content: string;\n perspectives: Perspective[];\n}\n\nexport interface CollaborativeArticleSummary {\n title: string;\n url: string;\n topics: { name: string; url: string }[];\n}\n\nexport interface FullArticle extends CollaborativeArticleSummary {\n sections: ArticleSection[];\n}\n\nexport interface MoreToExploreTopic {\n title: string;\n url: string;\n articles: CollaborativeArticleSummary[];\n}\n\n/**\n * Retries a function with exponential backoff.\n * @param {() => Promise<T>} fn The function to retry.\n * @param {number} maxRetries The maximum number of retries.\n * @param {number} wait The initial delay in milliseconds.\n * @returns {Promise<T>}\n * @template T\n */\nexport async function retry<T>(\n fn: () => Promise<T>,\n maxRetries = 3,\n wait = 1000,\n): Promise<T> {\n for (let i = 0; i < maxRetries; i++) {\n try {\n return await fn();\n } catch (e) {\n if (i < maxRetries - 1) {\n await delay(wait * 2 ** i);\n } else {\n throw e;\n }\n }\n }\n throw new Error('Retry failed');\n}\n\n/**\n * @param {Page} page - The Puppeteer page object.\n * @returns {Promise<void>}\n */\nexport async function handleModal(page: Page): Promise<void> {\n try {\n const modalSelector = '.modal__overlay';\n const modalVisible = await page.$(modalSelector);\n if (modalVisible) {\n // Click near the top-left corner to dismiss the modal by clicking the overlay\n await page.mouse.click(10, 10, { delay: 50 });\n await delay(1000); // Wait for modal to close\n }\n await page.waitForFunction(\n () => !document.querySelector('.modal__overlay'),\n { timeout: 5000 },\n );\n } catch (error) {\n // Modal did not appear or was already gone, which is fine.\n }\n}\n\nexport async function waitForUrl(\n page: Page,\n urlPart: string,\n timeout = 30000,\n): Promise<void> {\n await page.waitForFunction(\n (part: string) => window.location.href.includes(part),\n { timeout },\n urlPart,\n );\n}\n\n/**\n * Scrolls the page to the bottom to trigger lazy loading of content.\n * @param {Page} page - The Puppeteer page object.\n * @returns {Promise<void>}\n */\nexport async function scrollPageToBottom(page: Page): Promise<void> {\n let lastHeight = await page.evaluate('document.body.scrollHeight');\n let newHeight;\n let scrollAttempts = 0;\n const maxAttempts = 10; // Increased from 3 to be more robust\n const scrollStep = 1000; // was 2000\n\n while (scrollAttempts < maxAttempts) {\n await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');\n await delay(scrollStep); // Use a fixed delay for scrolling\n newHeight = await page.evaluate('document.body.scrollHeight');\n\n if (newHeight === lastHeight) {\n scrollAttempts++;\n } else {\n lastHeight = newHeight;\n scrollAttempts = 0; // Reset if new content loads\n }\n }\n}\n\ndeclare module '../core/public' {\n interface LinkedInPublic {\n getCollaborativeArticles: (options?: {\n limit?: number;\n }) => Promise<CollaborativeArticleSummary[]>;\n getFullCollaborativeArticles: (options?: {\n limit?: number;\n }) => Promise<FullArticle[]>;\n scrapeArticlePage: (\n articleSummary: CollaborativeArticleSummary,\n ) => Promise<FullArticle | null>;\n scrapeMoreToExplore: (options?: {\n topicLimit?: number;\n }) => Promise<MoreToExploreTopic[]>;\n }\n}\n\nLinkedInPublic.prototype.getCollaborativeArticles = async function (\n this: LinkedInPublic,\n options,\n) {\n const { page } = this;\n if (!page) {\n throw new Error('Page not initialized');\n }\n\n await retry(async () => {\n await page.goto('https://www.linkedin.com/pulse/topics/home/', {\n waitUntil: 'domcontentloaded',\n });\n await page.waitForSelector('section.core-rail', { timeout: 15000 });\n });\n\n await delay(2000 + Math.random() * 2000);\n\n await handleModal(page);\n\n // Scroll to load more articles\n let previousHeight;\n let consecutiveNoChange = 0;\n for (let i = 0; i < 30; i++) {\n // scroll up to 30 times\n previousHeight = await page.evaluate('document.body.scrollHeight');\n await page.evaluate('window.scrollTo(0, document.body.scrollHeight)');\n await delay(2000 + Math.random() * 2000); // longer, randomized delay\n const newHeight = await page.evaluate('document.body.scrollHeight');\n\n if (newHeight === previousHeight) {\n consecutiveNoChange++;\n if (consecutiveNoChange > 3) {\n // Stop after 4 consecutive scrolls with no new content\n break;\n }\n } else {\n consecutiveNoChange = 0; // Reset counter if new content is loaded\n }\n }\n\n const articleCardElements = await page.$$('.content-hub-entity-card-redesign');\n const articleSummaries: CollaborativeArticleSummary[] = [];\n for (const card of articleCardElements) {\n if (options?.limit && articleSummaries.length >= options.limit) break;\n const summary = await scrapeCollaborativeArticleSummary(card);\n if (summary) {\n articleSummaries.push(summary);\n }\n }\n\n return articleSummaries;\n};\n\nLinkedInPublic.prototype.getFullCollaborativeArticles = async function (\n this: LinkedInPublic,\n options,\n) {\n const summaries = await this.getCollaborativeArticles(options);\n const fullArticles: FullArticle[] = [];\n\n for (const summary of summaries) {\n const fullArticle = await this.scrapeArticlePage(summary);\n if (fullArticle) {\n fullArticles.push(fullArticle);\n }\n }\n return fullArticles;\n};\n\nLinkedInPublic.prototype.scrapeArticlePage = async function (\n this: LinkedInPublic,\n articleSummary: CollaborativeArticleSummary,\n): Promise<FullArticle | null> {\n const { page } = this;\n if (!page) {\n throw new Error('Page not initialized');\n }\n\n try {\n await page.goto(articleSummary.url, { waitUntil: 'domcontentloaded' });\n await delay(1500 + Math.random() * 1500);\n\n await handleModal(page);\n\n const segmentElements = await page.$$('.article-segment');\n const sections: ArticleSection[] = [];\n for (const segment of segmentElements) {\n const section = await scrapeArticleSection(segment);\n if (section) {\n sections.push(section);\n }\n }\n\n return {\n ...articleSummary,\n sections,\n };\n } catch (e) {\n console.error(`Failed to scrape article ${articleSummary.url}:`, e);\n return null;\n }\n};\n\nLinkedInPublic.prototype.scrapeMoreToExplore = async function (\n this: LinkedInPublic,\n options,\n) {\n const { page } = this;\n if (!page) {\n throw new Error('Page not initialized');\n }\n // 1. Get a pool of topics from the main articles page\n const summaries = await this.getCollaborativeArticles({ limit: 50 }); // Get a good number of articles to find diverse topics\n const allTopics = summaries.flatMap((s) => s.topics);\n\n const uniqueTopics = Array.from(new Map(allTopics.map(item => [item.url, item])).values());\n\n const results: MoreToExploreTopic[] = [];\n const topicsToScrape = uniqueTopics.slice(\n 0,\n options?.topicLimit ?? uniqueTopics.length,\n );\n\n // 2. Visit each topic page and scrape its articles\n for (const topic of topicsToScrape) {\n await page.goto(topic.url, { waitUntil: 'domcontentloaded' });\n await delay(1500 + Math.random() * 2500);\n await handleModal(page);\n\n try {\n await page.waitForFunction(\n \"document.querySelectorAll('.content-hub-entity-card-redesign').length > 0\",\n { timeout: 20000 },\n );\n } catch (e) {\n console.warn(`Could not find articles for topic \"${topic.name}\" at ${topic.url}`);\n continue; // Skip to next topic if no articles are found\n }\n\n const articleCardElements = await page.$$(\n '.content-hub-entity-card-redesign',\n );\n const articles: CollaborativeArticleSummary[] = [];\n for (const card of articleCardElements) {\n const summary = await scrapeCollaborativeArticleSummary(card);\n if (summary) {\n articles.push(summary);\n }\n }\n results.push({ title: topic.name, url: topic.url, articles });\n }\n\n return results;\n}; ","import { Page } from 'puppeteer-core';\n\ntype EnsureLoggedIn = () => Promise<void>;\ntype GetPage = () => Page | null;\n\nexport class LinkedInPosts {\n private ensureLoggedIn: EnsureLoggedIn;\n private getPage: GetPage;\n \n protected get page(): Page | null {\n return this.getPage();\n }\n\n constructor(ensureLoggedIn: EnsureLoggedIn, getPage: GetPage) {\n this.ensureLoggedIn = ensureLoggedIn;\n this.getPage = getPage;\n }\n\n async reactToPost(postUrl: string, reaction: 'like' | 'celebrate' | 'support' | 'love' | 'insightful' | 'funny') {\n await this.ensureLoggedIn();\n if (!this.page) {\n throw new Error('Page not initialized. Please login first.');\n }\n\n console.log(`Navigating to post: ${postUrl}`);\n await this.page.goto(postUrl, { waitUntil: 'domcontentloaded' });\n \n try {\n // Wait for the post to be loaded\n await this.page.waitForSelector('.feed-shared-social-action-bar', { timeout: 15000 });\n\n const likeButtonSelector = 'button.react-button__trigger';\n await this.page.waitForSelector(likeButtonSelector, { timeout: 10000 });\n\n if (reaction === 'like') {\n console.log('Liking the post');\n await this.page.click(likeButtonSelector);\n } else {\n // For other reactions, we need to hover over the like button to show them\n console.log(`Hovering over the like button to show other reactions`);\n await this.page.hover(likeButtonSelector);\n \n const reactionSelector = `button[aria-label=\"Send ${reaction}\"]`;\n console.log(`Waiting for reaction button: ${reactionSelector}`);\n await this.page.waitForSelector(reactionSelector, { timeout: 5000 });\n \n console.log(`Clicking the \"${reaction}\" button`);\n await this.page.click(reactionSelector);\n }\n\n console.log('Successfully reacted to the post.');\n } catch (error) {\n console.error('Failed to react to the post. Saving screenshot to reaction-failure.png');\n if (this.page) {\n await this.page.screenshot({ path: 'reaction-failure.png' });\n }\n throw new Error(`Could not react to post: ${(error as Error).message}`);\n }\n }\n\n async commentOnPost(postUrl: string, commentText: string) {\n await this.ensureLoggedIn();\n if (!this.page) {\n throw new Error('Page not initialized. Please login first.');\n }\n \n console.log(`Navigating to post: ${postUrl}`);\n await this.page.goto(postUrl, { waitUntil: 'domcontentloaded' });\n\n try {\n // Click the comment button to open the comment box\n const commentButtonSelector = 'button[aria-label=\"Comment\"]';\n console.log('Waiting for the comment button...');\n await this.page.waitForSelector(commentButtonSelector, { timeout: 10000 });\n await this.page.click(commentButtonSelector);\n\n // Wait for the comment editor to appear and type the comment\n const editorSelector = '.ql-editor[contenteditable=\"true\"]';\n console.log('Waiting for the comment editor...');\n await this.page.waitForSelector(editorSelector, { timeout: 10000 });\n console.log('Typing the comment...');\n await this.page.type(editorSelector, commentText);\n\n // Click the post button to submit the comment\n const postButtonSelector = 'button.comments-comment-box__submit-button--cr';\n console.log('Waiting for the post button...');\n await this.page.waitForSelector(postButtonSelector, { timeout: 10000 });\n await this.page.click(postButtonSelector);\n\n console.log('Successfully commented on the post.');\n } catch (error) {\n console.error('Failed to comment on the post. Saving screenshot to comment-failure.png');\n if (this.page) {\n await this.page.screenshot({ path: 'comment-failure.png' });\n }\n throw new Error(`Could not comment on post: ${(error as Error).message}`);\n }\n }\n\n async createPost(text: string) {\n await this.ensureLoggedIn();\n if (!this.page) {\n throw new Error('Page not initialized. Please login first.');\n }\n\n console.log('Navigating to the feed to create a post...');\n await this.page.goto('https://www.linkedin.com/feed/', { waitUntil: 'domcontentloaded' });\n\n try {\n // Click the \"Start a post\" button\n const startPostSelector = '.share-box-feed-entry__top-bar > button';\n console.log('Waiting for the \"Start a post\" button...');\n await this.page.waitForSelector(startPostSelector, { timeout: 10000 });\n await this.page.click(startPostSelector);\n\n // Wait for the post creation modal and the editor\n const editorSelector = '.ql-editor[contenteditable=\"true\"]';\n console.log('Waiting for the post editor...');\n await this.page.waitForSelector(editorSelector, { timeout: 10000 });\n console.log('Typing the post content...');\n await this.page.type(editorSelector, text);\n\n // Click the post button to publish\n const postButtonSelector = 'button.share-actions__primary-action';\n console.log('Waiting for the post button...');\n await this.page.waitForSelector(postButtonSelector, { timeout: 10000 });\n await this.page.click(postButtonSelector);\n\n console.log('Successfully created a new post.');\n // Wait a bit for the post to actually appear if we want to verify\n await new Promise(resolve => setTimeout(resolve, 5000));\n\n } catch (error) {\n console.error('Failed to create a post. Saving screenshot to create-post-failure.png');\n if (this.page) {\n await this.page.screenshot({ path: 'create-post-failure.png' });\n }\n throw new Error(`Could not create post: ${(error as Error).message}`);\n }\n }\n} ","import { Page } from 'puppeteer-core';\nimport { _autoScroll } from '../utils/scrolling';\n\ntype EnsureLoggedIn = () => Promise<void>;\ntype GetPage = () => Page | null;\n\nexport class LinkedInJobs {\n private ensureLoggedIn: EnsureLoggedIn;\n private getPage: GetPage;\n\n protected get page(): Page | null {\n return this.getPage();\n }\n\n constructor(ensureLoggedIn: EnsureLoggedIn, getPage: GetPage) {\n this.ensureLoggedIn = ensureLoggedIn;\n this.getPage = getPage;\n }\n\n async getRecommendedJobs(limit = 10) {\n await this.ensureLoggedIn();\n if (!this.page) {\n throw new Error('Page not initialized. Please login first.');\n }\n\n console.log('Navigating to recommended jobs page...');\n await this.page.goto('https://www.linkedin.com/jobs/recommender/recommended-jobs/', { waitUntil: 'domcontentloaded' });\n\n try {\n const jobListSelector = '.jobs-recommender__job-listings-container';\n console.log('Waiting for job listings to load...');\n await this.page.waitForSelector(jobListSelector, { timeout: 15000 });\n\n console.log('Scraping recommended jobs...');\n const jobs = await this.page.evaluate((limit) => {\n const jobElements = Array.from(document.querySelectorAll('.job-card-container'));\n const jobData = [];\n for (let i = 0; i < Math.min(jobElements.length, limit); i++) {\n const el = jobElements[i];\n const title = (el.querySelector('.job-card-list__title') as HTMLElement)?.innerText || 'TBD';\n const company = (el.querySelector('.job-card-container__primary-description') as HTMLElement)?.innerText || 'TBD';\n const location = (el.querySelector('.job-card-container__metadata-item') as HTMLElement)?.innerText || 'TBD';\n const link = (el.querySelector('a.job-card-list__title') as HTMLAnchorElement)?.href || 'TBD';\n jobData.push({ title, company, location, link });\n }\n return jobData;\n }, limit);\n\n return jobs;\n } catch (error) {\n console.error('Failed to get recommended jobs. Saving screenshot to recommended-jobs-failure.png');\n if (this.page) {\n await this.page.screenshot({ path: 'recommended-jobs-failure.png' });\n }\n throw new Error(`Could not get recommended jobs: ${(error as Error).message}`);\n }\n }\n\n async searchJobs(options: { keywords: string; location: string }, limit = 10) {\n if (!this.page) {\n throw new Error('Page not initialized. Please login first.');\n }\n\n const { keywords, location } = options;\n const searchUrl = `https://www.linkedin.com/jobs/search/?keywords=${encodeURIComponent(keywords)}&location=${encodeURIComponent(location)}`;\n console.log(`Navigating to job search page: ${searchUrl}`);\n await this.page.goto(searchUrl, { waitUntil: 'domcontentloaded' });\n\n const jobListSelector = '.jobs-search-results-list';\n console.log('Waiting for job list to load...');\n await this.page.waitForSelector(jobListSelector, { timeout: 15000 });\n \n console.log('Scrolling to load all search results...');\n await this.page.evaluate(async (selector) => {\n const element = document.querySelector(selector);\n if (!element) return;\n await new Promise<void>((resolve) => {\n let totalHeight = 0;\n const distance = 100;\n const timer = setInterval(() => {\n const scrollHeight = element.scrollHeight;\n element.scrollTop += distance;\n totalHeight += distance;\n\n if (totalHeight >= scrollHeight) {\n clearInterval(timer);\n resolve();\n }\n }, 100);\n });\n }, jobListSelector);\n\n const jobCardSelector = '.scaffold-layout__list-container li.jobs-search-results__list-item';\n const jobCards = await this.page.$$(jobCardSelector);\n const jobs = [];\n\n console.log(`Found ${jobCards.length} jobs in search. Scraping details for the first ${limit}...`);\n\n for (let i = 0; i < Math.min(jobCards.length, limit); i++) {\n try {\n const card = jobCards[i];\n const cardTitle = await card.$eval('.job-card-list__title', (el) => (el as HTMLElement).innerText.trim());\n\n await card.click();\n\n await this.page.waitForFunction(\n (title) => {\n const detailTitle = document.querySelector('.job-details-jobs-unified-top-card__job-title')?.textContent?.trim();\n return detailTitle?.includes(title);\n },\n { timeout: 10000 },\n cardTitle\n );\n\n console.log(`Scraping job ${i + 1}/${limit}: ${cardTitle}`);\n \n const job = await this.page.evaluate(() => {\n const getText = (selector: string) => (document.querySelector(selector) as HTMLElement)?.innerText.trim() || null;\n \n const title = getText('.job-details-jobs-unified-top-card__job-title');\n const company = getText('a.job-details-jobs-unified-top-card__company-name');\n const location = getText('.job-details-jobs-unified-top-card__primary-description-container > div');\n const description = getText('.jobs-description-content__text');\n const applicantCount = getText('.jobs-unified-top-card__applicant-count');\n const url = (document.querySelector('.job-details-jobs-unified-top-card__job-title a') as HTMLAnchorElement)?.href || window.location.href;\n\n return { title, company, location, description, applicantCount, url };\n });\n\n jobs.push(job);\n\n } catch (error) {\n console.error(`Failed to scrape job from search ${i + 1}: ${(error as Error).message}`);\n }\n }\n\n return jobs;\n }\n\n // Placeholder for future job-related authenticated methods\n} ","import { Page } from 'puppeteer-core';\n\nexport const _autoScroll = async (page: Page): Promise<void> => {\n await page.evaluate(async () => {\n await new Promise<void>((resolve) => {\n let totalHeight = 0;\n const distance = 100;\n const timer = setInterval(() => {\n const scrollHeight = document.body.scrollHeight;\n window.scrollBy(0, distance);\n totalHeight += distance;\n\n if (totalHeight >= scrollHeight) {\n clearInterval(timer);\n resolve();\n }\n }, 100);\n });\n });\n}; ","import { Page } from 'puppeteer-core';\nimport { _autoScroll } from '../utils/scrolling';\n\ntype EnsureLoggedIn = () => Promise<void>;\ntype GetPage = () => Page | null;\n\nexport class LinkedInUsers {\n private ensureLoggedIn: EnsureLoggedIn;\n private getPage: GetPage;\n \n protected get page(): Page | null {\n return this.getPage();\n }\n\n constructor(ensureLoggedIn: EnsureLoggedIn, getPage: GetPage) {\n this.ensureLoggedIn = ensureLoggedIn;\n this.getPage = getPage;\n }\n\n async sendConnectionRequest(profileUrl: string, message?: string) {\n await this.ensureLoggedIn();\n if (!this.page) {\n throw new Error('Page not initialized. Please login first.');\n }\n\n console.log(`Navigating to profile for connection request: ${profileUrl}`);\n await this.page.goto(profileUrl, { waitUntil: 'domcontentloaded' });\n\n try {\n const connectButtonSelector = `.artdeco-card button.artdeco-button--primary:not([aria-label=\"Message\"])`;\n console.log('Waiting for the Connect button...');\n await this.page.waitForSelector(connectButtonSelector, { timeout: 10000 });\n await this.page.click(connectButtonSelector);\n await this._handleConnectionModal(message);\n } catch (error) {\n console.error('Failed to send connection request. Saving screenshot to connect-failure.png');\n if (this.page) {\n await this.page.screenshot({ path: 'connect-failure.png' });\n }\n throw new Error(`Could not send connection request: ${(error as Error).message}`);\n }\n }\n\n private async _handleConnectionModal(message?: string) {\n await this.ensureLoggedIn();\n if (!this.page) {\n throw new Error('Page not initialized.');\n }\n\n const modalSelector = 'div[role=\"dialog\"]';\n console.log('Waiting for the connection modal...');\n await this.page.waitForSelector(modalSelector, { timeout: 10000 });\n\n if (message) {\n const addNoteButtonSelector = 'button[aria-label=\"Add a note\"]';\n console.log('Waiting for the \"Add a note\" button...');\n await this.page.waitForSelector(addNoteButtonSelector, { timeout: 10000 });\n await this.page.click(addNoteButtonSelector);\n\n const messageEditorSelector = 'textarea[name=\"message\"]';\n console.log('Waiting for the message editor...');\n await this.page.waitForSelector(messageEditorSelector, { timeout: 10000 });\n await this.page.type(messageEditorSelector, message);\n }\n\n const sendButtonSelector = 'button[aria-label^=\"Send\"]';\n console.log('Waiting for the final send button...');\n await this.page.waitForSelector(sendButtonSelector, { timeout: 10000 });\n await this.page.click(sendButtonSelector);\n\n console.log('Successfully sent connection request.');\n }\n\n async followProfile(profileUrl: string) {\n await this.ensureLoggedIn();\n if (!this.page) {\n throw new Error('Page not initialized. Please login first.');\n }\n\n console.log(`Navigating to profile to follow: ${profileUrl}`);\n await this.page.goto(profileUrl, { waitUntil: 'domcontentloaded' });\n\n try {\n const followButtonSelector = `.artdeco-card .pvs-profile-actions__custom button[aria-label*=\"Follow\"]`;\n console.log('Waiting for the Follow button...');\n await this.page.waitForSelector(followButtonSelector, { timeout: 10000 });\n await this.page.click(followButtonSelector);\n\n console.log('Successfully followed the profile.');\n } catch (error) {\n console.error('Failed to follow profile. Saving screenshot to follow-failure.png');\n if (this.page) {\n await this.page.screenshot({ path: 'follow-failure.png' });\n }\n throw new Error(`Could not follow profile: ${(error as Error).message}`);\n }\n }\n\n async sendConnectionRequestOrFollow(profileUrl: string, message?: string) {\n await this.ensureLoggedIn();\n if (!this.page) {\n throw new Error('Page not initialized. Please login first.');\n }\n\n console.log(`Navigating to profile: ${profileUrl}`);\n await this.page.goto(profileUrl, { waitUntil: 'domcontentloaded' });\n\n const connectButtonSelector = `.artdeco-card button.artdeco-button--primary:not([aria-label=\"Message\"])`;\n const followButtonSelector = `.artdeco-card .pvs-profile-actions__custom button[aria-label*=\"Follow\"]`;\n\n try {\n const connectButton = await this.page.$(connectButtonSelector);\n if (connectButton) {\n console.log('Connect button found. Sending connection request.');\n await connectButton.click();\n await this._handleConnectionModal(message);\n return;\n }\n\n const followButton = await this.page.$(followButtonSelector);\n if (followButton) {\n console.log('Follow button found. Following profile.');\n await followButton.click();\n console.log('Successfully followed the profile.');\n return;\n }\n\n throw new Error('Neither Connect nor Follow button was found on the profile.');\n\n } catch (error) {\n const errorMessage = `Failed to connect or follow: ${(error as Error).message}`;\n console.error(`${errorMessage} Saving screenshot to action-failure.png`);\n if (this.page) {\n await this.page.screenshot({ path: 'action-failure.png' });\n }\n throw new Error(errorMessage);\n }\n }\n\n async sendMessage(profileUrl: string, message: string) {\n if (!this.page) {\n throw new Error('Page not initialized. Please login first.');\n }\n\n console.log(`Navigating to profile: ${profileUrl} to send a message.`);\n await this.page.goto(profileUrl, { waitUntil: 'domcontentloaded' });\n\n try {\n // Click the message button on the profile\n const messageButtonSelector = `.artdeco-card button[aria-label^=\"Message\"]`;\n console.log('Waiting for the message button...');\n await this.page.waitForSelector(messageButtonSelector, { timeout: 10000 });\n await this.page.click(messageButtonSelector);\n\n // Wait for the message composer to appear\n const composerSelector = 'div.msg-form__contenteditable[role=\"textbox\"]';\n console.log('Waiting for the message composer...');\n await this.page.waitForSelector(composerSelector, { timeout: 10000 });\n\n console.log('Typing message...');\n await this.page.type(composerSelector, message);\n\n // Click the send button\n const sendButtonSelector = '.msg-form__send-button';\n console.log('Waiting for the send button...');\n await this.page.waitForSelector(sendButtonSelector, { timeout: 10000 });\n await this.page.click(sendButtonSelector);\n\n console.log('Successfully sent message.');\n await new Promise(resolve => setTimeout(resolve, 2000)); // Wait to ensure it's sent\n } catch (error) {\n console.error('Failed to send message. Saving screenshot to message-failure.png');\n if (this.page) {\n await this.page.screenshot({ path: 'message-failure.png' });\n }\n throw new Error(`Could not send message: ${(error as Error).message}`);\n }\n }\n\n async getProfile(profileUrl: string) {\n if (!this.page) {\n throw new Error('Page not initialized. Please login first.');\n }\n\n console.log(`Navigating to profile: ${profileUrl} to scrape data.`);\n await this.page.goto(profileUrl, { waitUntil: 'domcontentloaded' });\n\n console.log('Scrolling to load all profile sections...');\n await _autoScroll(this.page);\n\n console.log('Scraping profile data...');\n\n const profileData = await this.page.evaluate(() => {\n const name = (document.querySelector('h1') as HTMLElement)?.innerText || 'TBD';\n const headline = (document.querySelector('div.text-body-medium.break-words') as HTMLElement)?.innerText || 'TBD';\n const location = (document.querySelector('span.text-body-small.inline.break-words') as HTMLElement)?.innerText || 'TBD';\n\n // About section\n const aboutElement = document.querySelector('div.display-flex.ph5.pv3 > div.display-flex.full-width > div > div > span');\n const about = aboutElement ? (aboutElement as HTMLElement).innerText : 'TBD';\n\n // Experience\n const experience: any[] = [];\n const experienceElements = document.querySelectorAll('#experience-section ul > li');\n experienceElements.forEach(el => {\n const company = el.querySelector('p.text-body-small > span:nth-child(2)')?.textContent?.trim();\n const position = el.querySelector('h3.t-16')?.textContent?.trim();\n const dateRange = el.querySelector('h4.t-14 > span:nth-child(2)')?.textContent?.trim();\n experience.push({ company, position, dateRange });\n });\n\n // Education\n const education: any[] = [];\n const educationElements = document.querySelectorAll('#education-section ul > li');\n educationElements.forEach(el => {\n const school = el.querySelector('h3.pv-entity__school-name')?.textContent?.trim();\n const degree = el.querySelector('p.pv-entity__degree-name > span:nth-child(2)')?.textContent?.trim();\n const field = el.querySelector('p.pv-entity__fos > span:nth-child(2)')?.textContent?.trim();\n const dateRange = el.querySelector('p.pv-entity__dates > span:nth-child(2)')?.textContent?.trim();\n education.push({ school, degree, field, dateRange });\n });\n\n // Skills\n const skills: string[] = [];\n const skillsElements = document.querySelectorAll('.pv-skill-category-entity__name-text');\n skillsElements.forEach(el => {\n skills.push((el as HTMLElement).innerText.trim());\n });\n\n return {\n url: window.location.href,\n name,\n headline,\n location,\n about,\n experience,\n education,\n skills,\n };\n });\n\n console.log(profileData);\n\n