UNPKG

ojscraper

Version:

"A module that scraps user information from various OJ"

113 lines (92 loc) 2.86 kB
/** LightOJ Scraper */ const puppeteer = require('puppeteer'); const cheerio = require('cheerio'); async function getProblemInfo (problemID, credential) { let browser; try { browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'], // headless: false, }); } catch (err) { await browser.close(); throw err; } try { const regexStr = '^\\d{4}$'; const regex = new RegExp(regexStr); const match = regex.exec(problemID); if (!match) throw Error(`Invalid problemID. Failed regex ${regexStr}`); const page = await browser.newPage(); await page.goto(`http://www.lightoj.com/login_main.php`); await page.type('#myuserid', credential.userId); await page.type('#mypassword', credential.password); await page.click('input[type="submit"]'); await page.waitForNavigation(); await page.goto(`http://www.lightoj.com/volume_showproblem.php?problem=${problemID}`, { waitUntil: 'networkidle2', }); const html = await page.content(); const $ = cheerio.load(html); const titleRaw = $('#problem_name').text(); const title = titleRaw.trim().substr(7); if (!title) throw Error('Invalid ProblemID: Missing title'); const info = { platform: 'loj', problemID, title, link: `http://www.lightoj.com/volume_showproblem.php?problem=${problemID}`, }; return info; } catch (err) { throw err; } finally { await browser.close(); } }; async function getUserInfo (username, credential) { let browser; try { browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'], // headless: false, }); } catch (err) { await browser.close(); throw err; } try { const regexStr = '^\\d+$'; const regex = new RegExp(regexStr); const match = regex.exec(username); if (!match) throw Error(`Invalid username. Failed regex ${regexStr}`); const page = await browser.newPage(); await page.goto(`http://www.lightoj.com/login_main.php`); await page.type('#myuserid', credential.userId); await page.type('#mypassword', credential.password); await page.click('input[type="submit"]'); await page.waitForNavigation(); await page.goto(`http://www.lightoj.com/volume_userstat.php?user_id=${username}`, { waitUntil: 'networkidle2', }); const html = await page.content(); const $ = cheerio.load(html); const solved = $('.leftTop').eq(4).parent().parent().find('a').map(function() { return $(this).text().trim(); }).toArray(); return { platform: 'loj', username, solveCount: solved.length, solveList: solved, }; } catch (err) { throw err; } finally { await browser.close(); } }; module.exports = { getUserInfo, getProblemInfo, };