UNPKG

@padrocha/uam-scraping

Version:

Scraping of teachers

431 lines (430 loc) 27.7 kB
#!/usr/bin/env node "use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __asyncValues = (this && this.__asyncValues) || function (o) { if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined."); var m = o[Symbol.asyncIterator], i; return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i); function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; } function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); } }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const fs_1 = require("fs"); const puppeteer_1 = __importDefault(require("puppeteer")); const pdfkit_1 = __importDefault(require("pdfkit")); const child_process_1 = require("child_process"); const config_1 = require("./config"); const utils_1 = require("./utils"); (({ argv, platform, stdout }) => __awaiter(void 0, void 0, void 0, function* () { var e_1, _a, e_2, _b, e_3, _c, e_4, _d, e_5, _e; try { const ueas = new Array(); let subjects = new Map(); let teachers = new Set(); if ((0, fs_1.existsSync)(config_1.config.JSONUEA)) { (0, utils_1.log)('Uea´s backup found'); if (yield (0, utils_1.confirm)('Do you wanna use it')) { (0, utils_1.log)('Reading Uea´s'); const { schedules } = JSON.parse((0, fs_1.readFileSync)(config_1.config.JSONUEA, 'utf8')); if (schedules) { subjects = new Map(schedules); teachers = Array.from(subjects).reduce((prev, [, schedule]) => { schedule.forEach(({ teacher }) => { return teacher.name.length > 1 ? prev.add(teacher.name) : false; }); return prev; }, new Set()); } } else { stdout.moveCursor(0, -1); stdout.clearScreenDown(); } } if (subjects.size < 1) { (0, utils_1.log)('Loading page: SIIUAM'); const browser = yield puppeteer_1.default.launch({ headless: config_1.config.HEADLESS }); const SIIUAM = yield browser.newPage(); yield SIIUAM.goto(config_1.config.PATH.SIIUAM, { waitUntil: 'load', timeout: 0 }); SIIUAM.setDefaultNavigationTimeout(config_1.config.NAVIGATION); const bodyFrame = yield SIIUAM.$('frame#bodyFrame'); const body_frame = yield (bodyFrame === null || bodyFrame === void 0 ? void 0 : bodyFrame.contentFrame()); const controlFrame = yield (body_frame === null || body_frame === void 0 ? void 0 : body_frame.$('frame#controlFrame')); const control_frame = yield (controlFrame === null || controlFrame === void 0 ? void 0 : controlFrame.contentFrame()); const menu_frame = yield (0, utils_1.tryDOM)(() => __awaiter(void 0, void 0, void 0, function* () { const menuFrame = yield (control_frame === null || control_frame === void 0 ? void 0 : control_frame.$('frame#menuFrame')); return yield (menuFrame === null || menuFrame === void 0 ? void 0 : menuFrame.contentFrame()); }), SIIUAM); const USER = config_1.config.USER ? config_1.config.USER : yield (0, utils_1.askUser)(); const PASS = config_1.config.PASS ? config_1.config.PASS : yield (0, utils_1.password)(); (0, utils_1.log)('SIIUAM > Loggin user'); yield menu_frame.type('input[name="NOMBRE.IDENTIFICACION.NONMODELED"]', USER); yield menu_frame.type('input[name="COMPLEMENTO.IDENTIFICACION.NONMODELED"]', PASS); yield menu_frame.click('input[name="GO.IDENTIFICACION.NONMODELED"]'); (0, utils_1.log)('Fetching UEA Schedules'); yield menu_frame.waitForNavigation(); yield menu_frame.click('a[href="CTWBS012"]'); const info_frame = yield (0, utils_1.tryDOM)(() => __awaiter(void 0, void 0, void 0, function* () { const infoFrame = yield (body_frame === null || body_frame === void 0 ? void 0 : body_frame.$('frame#infoFrame')); return yield (infoFrame === null || infoFrame === void 0 ? void 0 : infoFrame.contentFrame()); }), SIIUAM); const uea_disponible = yield (0, utils_1.tryDOM)(() => __awaiter(void 0, void 0, void 0, function* () { const ifrmBol = yield info_frame.$('iframe#ifrm_bol'); const ifrm_bol = yield (ifrmBol === null || ifrmBol === void 0 ? void 0 : ifrmBol.contentFrame()); const _frame = yield (ifrm_bol === null || ifrm_bol === void 0 ? void 0 : ifrm_bol.$('iframe')); const frame = yield (_frame === null || _frame === void 0 ? void 0 : _frame.contentFrame()); return yield (frame === null || frame === void 0 ? void 0 : frame.$$('.celda.uea_disponible')); }), SIIUAM); try { for (var uea_disponible_1 = __asyncValues(uea_disponible), uea_disponible_1_1; uea_disponible_1_1 = yield uea_disponible_1.next(), !uea_disponible_1_1.done;) { const uea = uea_disponible_1_1.value; const id = yield uea.getProperty('id'); const key = yield (id === null || id === void 0 ? void 0 : id.jsonValue()); const _name = yield uea.$('.nombre'); const name = yield (_name === null || _name === void 0 ? void 0 : _name.evaluate(e => e.textContent)); ueas.push({ key, name }); } } catch (e_1_1) { e_1 = { error: e_1_1 }; } finally { try { if (uea_disponible_1_1 && !uea_disponible_1_1.done && (_a = uea_disponible_1.return)) yield _a.call(uea_disponible_1); } finally { if (e_1) throw e_1.error; } } if (ueas.length < 1) throw new Error("Uea´s not found"); const ueas_selected = yield (0, utils_1.selectUEAS)(ueas); const horario_UEA = yield (0, utils_1.tryDOM)(() => __awaiter(void 0, void 0, void 0, function* () { return yield menu_frame.$('a[href="IEWBC005.oConsulta"]'); }), SIIUAM); yield horario_UEA.click(); yield info_frame.waitForNavigation(); let count_ueas = 0; (0, utils_1.progressBar)(); try { for (var ueas_selected_1 = __asyncValues(ueas_selected), ueas_selected_1_1; ueas_selected_1_1 = yield ueas_selected_1.next(), !ueas_selected_1_1.done;) { const uea = ueas_selected_1_1.value; yield info_frame.type('input[name="CD_UEA.CONTROL.NONMODELED"]', uea.key + '\n'); yield info_frame.waitForNavigation(); const tr_array = yield (0, utils_1.tryDOM)(() => __awaiter(void 0, void 0, void 0, function* () { const fieldset = yield info_frame.$('fieldset'); return yield (fieldset === null || fieldset === void 0 ? void 0 : fieldset.$$("tr")); }), SIIUAM); const schedule = new Array(); tr_array.shift(); try { for (var tr_array_1 = (e_3 = void 0, __asyncValues(tr_array)), tr_array_1_1; tr_array_1_1 = yield tr_array_1.next(), !tr_array_1_1.done;) { const tr = tr_array_1_1.value; const td_array = yield (0, utils_1.tryDOM)(() => __awaiter(void 0, void 0, void 0, function* () { return yield tr.$$('td'); }), SIIUAM); const teacher_handle = yield td_array[0].getProperty('innerText'); const teacher = yield (teacher_handle === null || teacher_handle === void 0 ? void 0 : teacher_handle.jsonValue()); const group_handle = yield td_array[1].getProperty('innerText'); const group = yield (group_handle === null || group_handle === void 0 ? void 0 : group_handle.jsonValue()); const monday_handle = yield td_array[4].getProperty('innerText'); const monday = yield (monday_handle === null || monday_handle === void 0 ? void 0 : monday_handle.jsonValue()); const tuesday_handle = yield td_array[5].getProperty('innerText'); const tuesday = yield (tuesday_handle === null || tuesday_handle === void 0 ? void 0 : tuesday_handle.jsonValue()); const wednesday_handle = yield td_array[6].getProperty('innerText'); const wednesday = yield (wednesday_handle === null || wednesday_handle === void 0 ? void 0 : wednesday_handle.jsonValue()); const thursday_handle = yield td_array[7].getProperty('innerText'); const thursday = yield (thursday_handle === null || thursday_handle === void 0 ? void 0 : thursday_handle.jsonValue()); const friday_handle = yield td_array[8].getProperty('innerText'); const friday = yield (friday_handle === null || friday_handle === void 0 ? void 0 : friday_handle.jsonValue()); schedule.push({ key: uea.key, teacher, group, monday: (0, utils_1.timeParse)(monday), tuesday: (0, utils_1.timeParse)(tuesday), wednesday: (0, utils_1.timeParse)(wednesday), thursday: (0, utils_1.timeParse)(thursday), friday: (0, utils_1.timeParse)(friday) }); } } catch (e_3_1) { e_3 = { error: e_3_1 }; } finally { try { if (tr_array_1_1 && !tr_array_1_1.done && (_c = tr_array_1.return)) yield _c.call(tr_array_1); } finally { if (e_3) throw e_3.error; } } subjects.set(uea.name, schedule); (0, utils_1.progressBar)(ueas_selected.length, ++count_ueas); yield info_frame.click('input[name="CD_UEA.CONTROL.NONMODELED"]', { clickCount: 3 }); } } catch (e_2_1) { e_2 = { error: e_2_1 }; } finally { try { if (ueas_selected_1_1 && !ueas_selected_1_1.done && (_b = ueas_selected_1.return)) yield _b.call(ueas_selected_1); } finally { if (e_2) throw e_2.error; } } if (subjects.size < 1) throw new Error("Subjects not found"); teachers = Array.from(subjects).reduce((prev, [, schedule]) => { schedule.forEach(({ teacher }) => { return teacher.length > 1 ? prev.add(teacher) : false; }); return prev; }, new Set()); const teacherData = new Map(); (0, utils_1.log)('Closing page: SIIUAM'); yield SIIUAM.close(); (0, utils_1.log)('Loading page: misprofesores'); const MISPROFESORES = yield browser.newPage(); yield MISPROFESORES.goto(config_1.config.PATH.MISPROFESORES, { waitUntil: 'load', timeout: 0 }); MISPROFESORES.setDefaultNavigationTimeout(config_1.config.NAVIGATION); (0, utils_1.log)('Fetching teachers data'); (0, utils_1.progressBar)(); let count_teachers = 0; try { for (var teachers_1 = __asyncValues(teachers), teachers_1_1; teachers_1_1 = yield teachers_1.next(), !teachers_1_1.done;) { const teacher = teachers_1_1.value; yield MISPROFESORES.waitForSelector('input[name="q"]'); yield MISPROFESORES.type('input[name="q"]', teacher + '\n'); yield MISPROFESORES.waitForSelector('a.gs-title'); const results = yield MISPROFESORES.$$('.gsc-webResult.gsc-result'); if (results) try { for (var results_1 = (e_5 = void 0, __asyncValues(results)), results_1_1; results_1_1 = yield results_1.next(), !results_1_1.done;) { const result = results_1_1.value; const a = yield (result === null || result === void 0 ? void 0 : result.$('a.gs-title')); const teacher_path_handle = yield (a === null || a === void 0 ? void 0 : a.getProperty('href')); const teacher_path = yield (teacher_path_handle === null || teacher_path_handle === void 0 ? void 0 : teacher_path_handle.jsonValue()); const DATA = yield browser.newPage(); yield DATA.goto(teacher_path, { waitUntil: 'load', timeout: 0 }); DATA.setDefaultNavigationTimeout(15000000); try { const quality_element = yield DATA.$('.quality .grade'); const quality_handle = yield (quality_element === null || quality_element === void 0 ? void 0 : quality_element.getProperty('innerText')); const quality_text = yield (quality_handle === null || quality_handle === void 0 ? void 0 : quality_handle.jsonValue()); const quality = Number(quality_text); const takeAgain_element = yield DATA.$('.takeAgain .grade'); const takeAgain_handle = yield (takeAgain_element === null || takeAgain_element === void 0 ? void 0 : takeAgain_element.getProperty('innerText')); const takeAgain_text = yield (takeAgain_handle === null || takeAgain_handle === void 0 ? void 0 : takeAgain_handle.jsonValue()); const takeAgain = Number(takeAgain_text === null || takeAgain_text === void 0 ? void 0 : takeAgain_text.slice(0, -1)); const difficulty_element = yield DATA.$('.difficulty .grade'); const difficulty_handle = yield (difficulty_element === null || difficulty_element === void 0 ? void 0 : difficulty_element.getProperty('innerText')); const difficulty_text = yield (difficulty_handle === null || difficulty_handle === void 0 ? void 0 : difficulty_handle.jsonValue()); const difficulty = Number(difficulty_text); const students_element = yield DATA.$('.rating-count'); const students_handle = yield (students_element === null || students_element === void 0 ? void 0 : students_element.getProperty('innerText')); const students_text = yield (students_handle === null || students_handle === void 0 ? void 0 : students_handle.jsonValue()); const students = Number(students_text.trim().split(/\s/).shift()); teacherData.set(teacher, { name: teacher, quality, takeAgain, difficulty, students }); ++count_teachers; yield (DATA === null || DATA === void 0 ? void 0 : DATA.close()); break; } catch (_f) { yield (DATA === null || DATA === void 0 ? void 0 : DATA.close()); teachers.delete(teacher); } } } catch (e_5_1) { e_5 = { error: e_5_1 }; } finally { try { if (results_1_1 && !results_1_1.done && (_e = results_1.return)) yield _e.call(results_1); } finally { if (e_5) throw e_5.error; } } (0, utils_1.progressBar)(teachers.size, count_teachers); } } catch (e_4_1) { e_4 = { error: e_4_1 }; } finally { try { if (teachers_1_1 && !teachers_1_1.done && (_d = teachers_1.return)) yield _d.call(teachers_1); } finally { if (e_4) throw e_4.error; } } if (teacherData.size < 1) throw new Error("Teachers data not found"); (0, utils_1.log)('Closing page: misprofesores'); yield MISPROFESORES.close(); (0, utils_1.log)('Closing browser'); yield browser.close(); subjects = Array.from(subjects).reduce((prev, [subject, schedule]) => { return prev.set(subject, schedule.map(uea => { uea.teacher = teacherData.has(uea.teacher) ? teacherData.get(uea.teacher) : { name: '', quality: 0, takeAgain: 0, difficulty: 0, students: 0 }; return uea; }).sort(({ teacher: a }, { teacher: b }) => { const sort_quality = (a.quality < b.quality) ? -1 : ((a.quality > b.quality) ? 1 : 0); const sort_students = (a.students < b.students) ? -1 : ((a.students > b.students) ? 1 : 0); return sort_quality || sort_students; }).reverse()); }, new Map()); const JSONUEA = JSON.stringify(subjects, (key, value) => { return value instanceof Map ? { subjects: Array.from(value.entries()).map(([k]) => k), schedules: Array.from(value.entries()) } : value; }); (0, fs_1.writeFileSync)(config_1.config.JSONUEA, JSONUEA, 'utf-8'); (0, utils_1.log)(`Uea´s data saved as "${config_1.config.JSONUEA}"`); } const all_combinations = new Map(); (0, utils_1.log)('Choosing the most optimal schedules'); for (const [subject, schedules_root] of subjects) { const filteres_subjects = Array.from(subjects.keys()) .filter(s => s !== subject) .sort((a, b) => (a < b) ? -1 : ((a > b) ? 1 : 0)); for (const schedule_data of schedules_root) { const compatible_schedules = new Map(); if (!schedule_data.teacher.name) break; compatible_schedules.set(subject, schedule_data); filteres_subjects.forEach((subject_iteration, i) => { const schedules_iteration = subjects.get(subject_iteration); for (const schedule_curr of schedules_iteration) { let bool_val = true; for (const [, uea] of compatible_schedules) { for (const day of utils_1.week) { const uea_day = uea[day]; const curr_day = schedule_curr[day]; if (uea_day && curr_day) { if (uea_day.starts === curr_day.starts || uea_day.ends === curr_day.ends || (curr_day.starts > uea_day.starts && curr_day.starts < uea_day.ends) || (curr_day.ends > uea_day.starts && curr_day.ends < uea_day.ends) || (uea_day.starts > curr_day.starts && uea_day.starts < curr_day.ends) || (uea_day.ends > curr_day.starts && uea_day.ends < curr_day.ends)) { bool_val = false; break; } } } } if (!!bool_val && !!schedule_curr.teacher.name) { compatible_schedules.set(subject_iteration, schedule_curr); break; } } }); if (compatible_schedules.size == subjects.size) { const sorted_compatible = Array.from(compatible_schedules) .sort(([a], [b]) => (a < b) ? -1 : ((a > b) ? 1 : 0)); const SCHEDULECODE = sorted_compatible.reduce((prev, [subject_code, schedule_code]) => { prev += schedule_code.group; prev += subject_code.substring(0, 3).toUpperCase(); prev += subject_code.slice(-3).toUpperCase(); prev += schedule_code.teacher.name.substring(0, 3).toUpperCase(); prev += schedule_code.teacher.name.slice(-3).toUpperCase(); return prev; }, '').padEnd(subjects.size * 18, '0'); all_combinations.set(SCHEDULECODE, new Map(sorted_compatible)); } } } let sorted_iterations = Array.from(all_combinations).sort(([, schedule_a], [, schedule_b]) => { const a = Array.from(schedule_a).reduce((prev, [, schedule_percent]) => { return prev += schedule_percent.teacher.quality; }, 0) / schedule_a.size; const b = Array.from(schedule_b).reduce((prev, [, schedule_percent]) => { return prev += schedule_percent.teacher.quality; }, 0) / schedule_b.size; return a - b; }).reverse(); if (yield (0, utils_1.confirm)('Do you want to prioritize a teacher')) { const teacher_list = yield (0, utils_1.selectTeachers)(teachers); sorted_iterations = sorted_iterations.filter(([, schedule_filter]) => { const filtered = Array.from(schedule_filter).filter(([, { teacher }]) => teacher_list.includes(teacher.name)); return filtered.length >= teacher_list.length; }); if (sorted_iterations.length < 1) throw new Error("No combinations lefts with this teachers"); } const { subjects_info, hours } = yield (0, utils_1.selectSchedule)(sorted_iterations); const doc = new pdfkit_1.default(); const max_size = Math.max.apply(Math, subjects_info.map(({ teacher }) => teacher.length)); const x = 50; const x_mins = doc.page.width - x; if (!(0, fs_1.existsSync)(config_1.config.PDF.DIRECTORY)) (0, fs_1.mkdirSync)(config_1.config.PDF.DIRECTORY); doc.pipe((0, fs_1.createWriteStream)(config_1.config.PDF.SCHEDULE)); doc.image('assets/icon-1024x1024.png', x, 50, { width: 30 }) .fontSize(22.5) .text('Horario hecho por Cétr!co.Productions', x + 40, 56); doc.moveDown(); subjects_info.forEach(({ key, subject, teacher }) => { const y = doc.y; let t; doc.fontSize(10) .text(key, x, y) .text(teacher, t = x + key.length * 6, y) .text(subject, t + max_size * 5.5, y); }); doc.moveDown(); const y_start = doc.y; const hour_size = 5 * 8; const average_width = (x_mins - x - hour_size) / 5; const margin_top = 4.5; doc.moveTo(x, y_start).lineTo(x_mins, y_start).stroke(); utils_1.week.reduce((size, day) => { doc.text(day, size, y_start + margin_top, { width: average_width, align: 'center' }); size += average_width; return size; }, x + hour_size); doc.moveTo(x, doc.y).lineTo(x_mins, doc.y).stroke(); Object.keys(hours).forEach(hour => { const y = doc.y; doc.text(hour, x, y + margin_top, { width: hour_size, align: 'center' }); utils_1.week.reduce((size, day) => { const hour_day = hours[hour][day]; if (hour_day) doc.text(hour_day, size, y + margin_top, { width: average_width, align: 'center' }); size += average_width; return size; }, x + hour_size); doc.moveTo(x, doc.y).lineTo(x_mins, doc.y).stroke(); }); doc.moveTo(x, y_start).lineTo(x, doc.y).stroke(); doc.moveTo(x_mins, y_start).lineTo(x_mins, doc.y).stroke(); doc.end(); const command = platform.startsWith('win') ? 'start' : platform === 'darwin' ? 'open' : 'xdg-open'; (0, child_process_1.execSync)(command + ' ' + config_1.config.PDF.SCHEDULE); if (!(yield (0, utils_1.confirm)('Do you want to keep the uea backup'))) (0, fs_1.unlinkSync)(config_1.config.JSONUEA); process.exit(0); } catch (e) { console.error(e); process.exit(1); } finally { (0, utils_1.log)(`Done`); } }))(process);