UNPKG

puppeteer-hcaptcha

Version:

A library to solve hcaptcha challenges that are automated within puppeteer and without

403 lines (353 loc) 11.6 kB
const puppeteer = require("puppeteer-extra"); const pluginStealth = require("puppeteer-extra-plugin-stealth"); const request = require("request-promise-native"); const jwt_decode = require("jwt-decode"); const userAgents = JSON.parse( require("fs").readFileSync(`${__dirname}/src/useragents.json`) ); const { rdn, tensor, mm } = require("./src/utils"); // Instantiate Version let version; // PluginStealth for any puppeteer instances puppeteer.use(pluginStealth()); /** * @description Dynamically get HSL function for returning value needed to solve * @param {string} req * @returns response token */ const getHSL = async (req) => { version = jwt_decode(req)["l"].slice( "https://newassets.hcaptcha.com/c/".length ); const hsl = await request.get(`${jwt_decode(req)["l"]}/hsl.js`); const browser = await puppeteer.launch({ ignoreHTTPSErrors: true, headless: true, args: [ `--window-size=1300,570`, "--window-position=000,000", "--disable-dev-shm-usage", "--no-sandbox", '--user-data-dir="/tmp/chromium"', "--disable-web-security", "--disable-features=site-per-process", ], }); // Get browser pages const [page] = await browser.pages(); await page.addScriptTag({ content: hsl, }); const response = await page.evaluate(`hsl("${req}")`); await browser.close(); return response; }; /** * @description Dynamically get HSW function for returning value needed to solve * @param {string} req * @returns response token */ const getHSW = async (req) => { version = jwt_decode(req)["l"].slice( "https://newassets.hcaptcha.com/c/".length ); const hsw = await request.get(`${jwt_decode(req)["l"]}/hsw.js`); const browser = await puppeteer.launch({ ignoreHTTPSErrors: true, headless: true, args: [ `--window-size=1300,570`, "--window-position=000,000", "--disable-dev-shm-usage", "--no-sandbox", '--user-data-dir="/tmp/chromium"', "--disable-web-security", "--disable-features=site-per-process", ], }); // Get browser pages const [page] = await browser.pages(); await page.addScriptTag({ content: hsw, }); const response = await page.evaluate(`hsw("${req}")`); await browser.close(); return response; }; /** * @description Use tensforflow image recognition to determine correct answers * @param {string} request_image * @param {Array[Task]} tasks * @returns answers map */ const getAnswersTF = async (request_image, tasks) => { let answers = new Map(); const threads = []; for (const task of tasks) { threads.push(tensor(task.datapoint_uri)); } try { await Promise.all(threads).then((results) => { results.forEach((res, index) => { let [data] = res; if ( data !== undefined && data.class.toUpperCase() === request_image.toUpperCase() && data.score > 0.5 ) { answers[tasks[index].task_key] = "true"; } else { answers[tasks[index].task_key] = "false"; } }); }); } catch (err) { console.log(err); } return answers; }; /** * @description Main solve function that attempts to solve captcha * @param {string} userAgent * @param {string} sitekey * @param {string} host * @returns hCaptcha solved token */ const tryToSolve = async (userAgent, sitekey, host) => { // Create headers let headers = { Authority: "hcaptcha.com", Accept: "application/json", "Accept-Language": "en-US,en;q=0.9", "Content-Type": "application/x-www-form-urlencoded", Origin: "https://newassets.hcaptcha.com", "Sec-Fetch-Site": "same-site", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Dest": "empty", "User-Agent": userAgent, }; // Check site config let response = await request({ method: "get", headers, json: true, url: `https://hcaptcha.com/checksiteconfig?host=${host}&sitekey=${sitekey}&sc=1&swa=1`, }); let timestamp = Date.now() + rdn(30, 120); // Check for HSJ if (response.c !== undefined && response.c.type === "hsj") { console.error("Wrong Challenge Type. Retrying."); return null; } // Setup form for getting tasks list if (response.c === undefined) { form = { sitekey, host, hl: "en", motionData: { st: timestamp, mm: mm, }, }; } else { form = { sitekey, host, hl: "en", motionData: JSON.stringify({ st: timestamp, dct: timestamp, mm: mm, }), n: response.c.type === "hsl" ? await getHSL(response.c.req) : await getHSW(response.c.req), v: version, c: JSON.stringify(response.c), }; } // Get tasks let getTasks = await request({ method: "post", headers, json: true, url: `https://hcaptcha.com/getcaptcha?s=${sitekey}`, form: form, }); if (getTasks.generated_pass_UUID) { return getTasks.generated_pass_UUID; } // Find what the captcha is looking for user's to click const requestImageArray = getTasks.requester_question.en.split(" "); let request_image = requestImageArray[requestImageArray.length - 1]; if (request_image === "motorbus") { request_image = "bus"; } else { request_image = requestImageArray[requestImageArray.length - 1]; } const key = getTasks.key; if (key.charAt(0) !== "E" && key.charAt(2) === "_") { return key; } const tasks = getTasks.tasklist; const job = getTasks.request_type; timestamp = Date.now() + rdn(30, 120); // Get Answers const answers = await getAnswersTF(request_image, tasks); // Renew response response = await request({ method: "get", headers, json: true, url: `https://hcaptcha.com/checksiteconfig?host=${host}&sitekey=${sitekey}&sc=1&swa=1`, }); // Setup data for checking answers if (response.c === undefined) { captchaResponse = { job_mode: job, answers, serverdomain: host, sitekey, motionData: JSON.stringify({ st: timestamp, dct: timestamp, mm: mm, }), n: null, c: "null", }; } else { captchaResponse = { job_mode: job, answers, serverdomain: host, sitekey, motionData: JSON.stringify({ st: timestamp, dct: timestamp, mm: mm, }), n: response.c.type === "hsl" ? await getHSL(response.c.req) : await getHSW(response.c.req), v: version, c: JSON.stringify(response.c), }; } // Set new headers headers = { Authority: "hcaptcha.com", Accept: "application/json", "Accept-Language": "en-US,en;q=0.9", "Content-Type": "application/json", Origin: "https://newassets.hcaptcha.com", "Sec-Fetch-Site": "same-site", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Dest": "empty", "User-Agent": userAgent, }; // Check answers const checkAnswers = await request( `https://hcaptcha.com/checkcaptcha/${key}?s=${sitekey}`, { method: "post", headers, json: true, body: captchaResponse, } ); if (checkAnswers.generated_pass_UUID) { return checkAnswers.generated_pass_UUID; } console.error("Wrong Response. Retrying."); return null; }; /** * @description Sets up userAgent and passes required information to tryToSolveFunction * @param {string} siteKey * @param {string} host * @returns hCaptcha solved token */ const solveCaptcha = async (siteKey, host) => { try { while (true) { // Get random index for random user agent const randomIndex = Math.round( Math.random() * (userAgents.length - 1 - 0) + 0 ); // Attempt to solve hCaptcha const result = await tryToSolve( userAgents[randomIndex].useragent, siteKey, host ); if (result && result !== null) { return result; } } } catch (e) { if (e.statusCode === 429) { // Reached rate limit, wait 30 sec console.log("Rate limited. Waiting 30 seconds."); await new Promise((r) => setTimeout(r, 30000)); } } }; /** * @description Setup function for hCaptcha solver using puppeteer * @param {Page} page * @returns null */ const hcaptcha = async (page) => { // Expose the page to our solveCaptcha function so we can utilize it await page.exposeFunction("solveCaptcha", solveCaptcha); // Wait for iframe to load await page.waitForSelector('iframe[src*="hcaptcha.com"]'); const token = await page.evaluate(async () => { // Get hcaptcha iframe so we can get the host value const iframesrc = document.querySelector( 'iframe[src*="hcaptcha.com"]' ).src; const urlParams = new URLSearchParams(iframesrc); return await solveCaptcha( urlParams.get("sitekey"), urlParams.get("host") ); }); await page.evaluate((token) => { document.querySelector('[name="h-captcha-response"]').value = token; }, token); return; }; /** * @description Setup function for hCaptcha solver without puppeteer * @param {string} url * @returns hCaptcha solved token */ const hcaptchaToken = async (url) => { const browser = await puppeteer.launch({ ignoreHTTPSErrors: true, headless: true, }); // Get browser pages const [page] = await browser.pages(); await page.goto(url); await page.setDefaultNavigationTimeout(0); // Wait for iframe to load await page.waitForSelector('iframe[src*="hcaptcha.com"]'); let captchaData = await page.evaluate(async () => { // Get hcaptcha iframe so we can get the host value const iframesrc = document.querySelector( 'iframe[src*="hcaptcha.com"]' ).src; const urlParams = new URLSearchParams(iframesrc); return [urlParams.get("sitekey"), urlParams.get("host")]; }); await browser.close(); // Solve Captcha return await solveCaptcha(captchaData[0], captchaData[1]); }; module.exports = { hcaptcha, hcaptchaToken, solveCaptcha };