@padrocha/uam-scraping
Version:
Scraping of teachers
431 lines (430 loc) • 27.7 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __asyncValues = (this && this.__asyncValues) || function (o) {
if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined.");
var m = o[Symbol.asyncIterator], i;
return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i);
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const fs_1 = require("fs");
const puppeteer_1 = __importDefault(require("puppeteer"));
const pdfkit_1 = __importDefault(require("pdfkit"));
const child_process_1 = require("child_process");
const config_1 = require("./config");
const utils_1 = require("./utils");
(({ argv, platform, stdout }) => __awaiter(void 0, void 0, void 0, function* () {
var e_1, _a, e_2, _b, e_3, _c, e_4, _d, e_5, _e;
try {
const ueas = new Array();
let subjects = new Map();
let teachers = new Set();
if ((0, fs_1.existsSync)(config_1.config.JSONUEA)) {
(0, utils_1.log)('Uea´s backup found');
if (yield (0, utils_1.confirm)('Do you wanna use it')) {
(0, utils_1.log)('Reading Uea´s');
const { schedules } = JSON.parse((0, fs_1.readFileSync)(config_1.config.JSONUEA, 'utf8'));
if (schedules) {
subjects = new Map(schedules);
teachers = Array.from(subjects).reduce((prev, [, schedule]) => {
schedule.forEach(({ teacher }) => {
return teacher.name.length > 1 ? prev.add(teacher.name) : false;
});
return prev;
}, new Set());
}
}
else {
stdout.moveCursor(0, -1);
stdout.clearScreenDown();
}
}
if (subjects.size < 1) {
(0, utils_1.log)('Loading page: SIIUAM');
const browser = yield puppeteer_1.default.launch({ headless: config_1.config.HEADLESS });
const SIIUAM = yield browser.newPage();
yield SIIUAM.goto(config_1.config.PATH.SIIUAM, { waitUntil: 'load', timeout: 0 });
SIIUAM.setDefaultNavigationTimeout(config_1.config.NAVIGATION);
const bodyFrame = yield SIIUAM.$('frame#bodyFrame');
const body_frame = yield (bodyFrame === null || bodyFrame === void 0 ? void 0 : bodyFrame.contentFrame());
const controlFrame = yield (body_frame === null || body_frame === void 0 ? void 0 : body_frame.$('frame#controlFrame'));
const control_frame = yield (controlFrame === null || controlFrame === void 0 ? void 0 : controlFrame.contentFrame());
const menu_frame = yield (0, utils_1.tryDOM)(() => __awaiter(void 0, void 0, void 0, function* () {
const menuFrame = yield (control_frame === null || control_frame === void 0 ? void 0 : control_frame.$('frame#menuFrame'));
return yield (menuFrame === null || menuFrame === void 0 ? void 0 : menuFrame.contentFrame());
}), SIIUAM);
const USER = config_1.config.USER ? config_1.config.USER : yield (0, utils_1.askUser)();
const PASS = config_1.config.PASS ? config_1.config.PASS : yield (0, utils_1.password)();
(0, utils_1.log)('SIIUAM > Loggin user');
yield menu_frame.type('input[name="NOMBRE.IDENTIFICACION.NONMODELED"]', USER);
yield menu_frame.type('input[name="COMPLEMENTO.IDENTIFICACION.NONMODELED"]', PASS);
yield menu_frame.click('input[name="GO.IDENTIFICACION.NONMODELED"]');
(0, utils_1.log)('Fetching UEA Schedules');
yield menu_frame.waitForNavigation();
yield menu_frame.click('a[href="CTWBS012"]');
const info_frame = yield (0, utils_1.tryDOM)(() => __awaiter(void 0, void 0, void 0, function* () {
const infoFrame = yield (body_frame === null || body_frame === void 0 ? void 0 : body_frame.$('frame#infoFrame'));
return yield (infoFrame === null || infoFrame === void 0 ? void 0 : infoFrame.contentFrame());
}), SIIUAM);
const uea_disponible = yield (0, utils_1.tryDOM)(() => __awaiter(void 0, void 0, void 0, function* () {
const ifrmBol = yield info_frame.$('iframe#ifrm_bol');
const ifrm_bol = yield (ifrmBol === null || ifrmBol === void 0 ? void 0 : ifrmBol.contentFrame());
const _frame = yield (ifrm_bol === null || ifrm_bol === void 0 ? void 0 : ifrm_bol.$('iframe'));
const frame = yield (_frame === null || _frame === void 0 ? void 0 : _frame.contentFrame());
return yield (frame === null || frame === void 0 ? void 0 : frame.$$('.celda.uea_disponible'));
}), SIIUAM);
try {
for (var uea_disponible_1 = __asyncValues(uea_disponible), uea_disponible_1_1; uea_disponible_1_1 = yield uea_disponible_1.next(), !uea_disponible_1_1.done;) {
const uea = uea_disponible_1_1.value;
const id = yield uea.getProperty('id');
const key = yield (id === null || id === void 0 ? void 0 : id.jsonValue());
const _name = yield uea.$('.nombre');
const name = yield (_name === null || _name === void 0 ? void 0 : _name.evaluate(e => e.textContent));
ueas.push({ key, name });
}
}
catch (e_1_1) { e_1 = { error: e_1_1 }; }
finally {
try {
if (uea_disponible_1_1 && !uea_disponible_1_1.done && (_a = uea_disponible_1.return)) yield _a.call(uea_disponible_1);
}
finally { if (e_1) throw e_1.error; }
}
if (ueas.length < 1)
throw new Error("Uea´s not found");
const ueas_selected = yield (0, utils_1.selectUEAS)(ueas);
const horario_UEA = yield (0, utils_1.tryDOM)(() => __awaiter(void 0, void 0, void 0, function* () {
return yield menu_frame.$('a[href="IEWBC005.oConsulta"]');
}), SIIUAM);
yield horario_UEA.click();
yield info_frame.waitForNavigation();
let count_ueas = 0;
(0, utils_1.progressBar)();
try {
for (var ueas_selected_1 = __asyncValues(ueas_selected), ueas_selected_1_1; ueas_selected_1_1 = yield ueas_selected_1.next(), !ueas_selected_1_1.done;) {
const uea = ueas_selected_1_1.value;
yield info_frame.type('input[name="CD_UEA.CONTROL.NONMODELED"]', uea.key + '\n');
yield info_frame.waitForNavigation();
const tr_array = yield (0, utils_1.tryDOM)(() => __awaiter(void 0, void 0, void 0, function* () {
const fieldset = yield info_frame.$('fieldset');
return yield (fieldset === null || fieldset === void 0 ? void 0 : fieldset.$$("tr"));
}), SIIUAM);
const schedule = new Array();
tr_array.shift();
try {
for (var tr_array_1 = (e_3 = void 0, __asyncValues(tr_array)), tr_array_1_1; tr_array_1_1 = yield tr_array_1.next(), !tr_array_1_1.done;) {
const tr = tr_array_1_1.value;
const td_array = yield (0, utils_1.tryDOM)(() => __awaiter(void 0, void 0, void 0, function* () {
return yield tr.$$('td');
}), SIIUAM);
const teacher_handle = yield td_array[0].getProperty('innerText');
const teacher = yield (teacher_handle === null || teacher_handle === void 0 ? void 0 : teacher_handle.jsonValue());
const group_handle = yield td_array[1].getProperty('innerText');
const group = yield (group_handle === null || group_handle === void 0 ? void 0 : group_handle.jsonValue());
const monday_handle = yield td_array[4].getProperty('innerText');
const monday = yield (monday_handle === null || monday_handle === void 0 ? void 0 : monday_handle.jsonValue());
const tuesday_handle = yield td_array[5].getProperty('innerText');
const tuesday = yield (tuesday_handle === null || tuesday_handle === void 0 ? void 0 : tuesday_handle.jsonValue());
const wednesday_handle = yield td_array[6].getProperty('innerText');
const wednesday = yield (wednesday_handle === null || wednesday_handle === void 0 ? void 0 : wednesday_handle.jsonValue());
const thursday_handle = yield td_array[7].getProperty('innerText');
const thursday = yield (thursday_handle === null || thursday_handle === void 0 ? void 0 : thursday_handle.jsonValue());
const friday_handle = yield td_array[8].getProperty('innerText');
const friday = yield (friday_handle === null || friday_handle === void 0 ? void 0 : friday_handle.jsonValue());
schedule.push({
key: uea.key,
teacher,
group,
monday: (0, utils_1.timeParse)(monday),
tuesday: (0, utils_1.timeParse)(tuesday),
wednesday: (0, utils_1.timeParse)(wednesday),
thursday: (0, utils_1.timeParse)(thursday),
friday: (0, utils_1.timeParse)(friday)
});
}
}
catch (e_3_1) { e_3 = { error: e_3_1 }; }
finally {
try {
if (tr_array_1_1 && !tr_array_1_1.done && (_c = tr_array_1.return)) yield _c.call(tr_array_1);
}
finally { if (e_3) throw e_3.error; }
}
subjects.set(uea.name, schedule);
(0, utils_1.progressBar)(ueas_selected.length, ++count_ueas);
yield info_frame.click('input[name="CD_UEA.CONTROL.NONMODELED"]', { clickCount: 3 });
}
}
catch (e_2_1) { e_2 = { error: e_2_1 }; }
finally {
try {
if (ueas_selected_1_1 && !ueas_selected_1_1.done && (_b = ueas_selected_1.return)) yield _b.call(ueas_selected_1);
}
finally { if (e_2) throw e_2.error; }
}
if (subjects.size < 1)
throw new Error("Subjects not found");
teachers = Array.from(subjects).reduce((prev, [, schedule]) => {
schedule.forEach(({ teacher }) => {
return teacher.length > 1 ? prev.add(teacher) : false;
});
return prev;
}, new Set());
const teacherData = new Map();
(0, utils_1.log)('Closing page: SIIUAM');
yield SIIUAM.close();
(0, utils_1.log)('Loading page: misprofesores');
const MISPROFESORES = yield browser.newPage();
yield MISPROFESORES.goto(config_1.config.PATH.MISPROFESORES, { waitUntil: 'load', timeout: 0 });
MISPROFESORES.setDefaultNavigationTimeout(config_1.config.NAVIGATION);
(0, utils_1.log)('Fetching teachers data');
(0, utils_1.progressBar)();
let count_teachers = 0;
try {
for (var teachers_1 = __asyncValues(teachers), teachers_1_1; teachers_1_1 = yield teachers_1.next(), !teachers_1_1.done;) {
const teacher = teachers_1_1.value;
yield MISPROFESORES.waitForSelector('input[name="q"]');
yield MISPROFESORES.type('input[name="q"]', teacher + '\n');
yield MISPROFESORES.waitForSelector('a.gs-title');
const results = yield MISPROFESORES.$$('.gsc-webResult.gsc-result');
if (results)
try {
for (var results_1 = (e_5 = void 0, __asyncValues(results)), results_1_1; results_1_1 = yield results_1.next(), !results_1_1.done;) {
const result = results_1_1.value;
const a = yield (result === null || result === void 0 ? void 0 : result.$('a.gs-title'));
const teacher_path_handle = yield (a === null || a === void 0 ? void 0 : a.getProperty('href'));
const teacher_path = yield (teacher_path_handle === null || teacher_path_handle === void 0 ? void 0 : teacher_path_handle.jsonValue());
const DATA = yield browser.newPage();
yield DATA.goto(teacher_path, { waitUntil: 'load', timeout: 0 });
DATA.setDefaultNavigationTimeout(15000000);
try {
const quality_element = yield DATA.$('.quality .grade');
const quality_handle = yield (quality_element === null || quality_element === void 0 ? void 0 : quality_element.getProperty('innerText'));
const quality_text = yield (quality_handle === null || quality_handle === void 0 ? void 0 : quality_handle.jsonValue());
const quality = Number(quality_text);
const takeAgain_element = yield DATA.$('.takeAgain .grade');
const takeAgain_handle = yield (takeAgain_element === null || takeAgain_element === void 0 ? void 0 : takeAgain_element.getProperty('innerText'));
const takeAgain_text = yield (takeAgain_handle === null || takeAgain_handle === void 0 ? void 0 : takeAgain_handle.jsonValue());
const takeAgain = Number(takeAgain_text === null || takeAgain_text === void 0 ? void 0 : takeAgain_text.slice(0, -1));
const difficulty_element = yield DATA.$('.difficulty .grade');
const difficulty_handle = yield (difficulty_element === null || difficulty_element === void 0 ? void 0 : difficulty_element.getProperty('innerText'));
const difficulty_text = yield (difficulty_handle === null || difficulty_handle === void 0 ? void 0 : difficulty_handle.jsonValue());
const difficulty = Number(difficulty_text);
const students_element = yield DATA.$('.rating-count');
const students_handle = yield (students_element === null || students_element === void 0 ? void 0 : students_element.getProperty('innerText'));
const students_text = yield (students_handle === null || students_handle === void 0 ? void 0 : students_handle.jsonValue());
const students = Number(students_text.trim().split(/\s/).shift());
teacherData.set(teacher, {
name: teacher,
quality,
takeAgain,
difficulty,
students
});
++count_teachers;
yield (DATA === null || DATA === void 0 ? void 0 : DATA.close());
break;
}
catch (_f) {
yield (DATA === null || DATA === void 0 ? void 0 : DATA.close());
teachers.delete(teacher);
}
}
}
catch (e_5_1) { e_5 = { error: e_5_1 }; }
finally {
try {
if (results_1_1 && !results_1_1.done && (_e = results_1.return)) yield _e.call(results_1);
}
finally { if (e_5) throw e_5.error; }
}
(0, utils_1.progressBar)(teachers.size, count_teachers);
}
}
catch (e_4_1) { e_4 = { error: e_4_1 }; }
finally {
try {
if (teachers_1_1 && !teachers_1_1.done && (_d = teachers_1.return)) yield _d.call(teachers_1);
}
finally { if (e_4) throw e_4.error; }
}
if (teacherData.size < 1)
throw new Error("Teachers data not found");
(0, utils_1.log)('Closing page: misprofesores');
yield MISPROFESORES.close();
(0, utils_1.log)('Closing browser');
yield browser.close();
subjects = Array.from(subjects).reduce((prev, [subject, schedule]) => {
return prev.set(subject, schedule.map(uea => {
uea.teacher = teacherData.has(uea.teacher)
? teacherData.get(uea.teacher)
: {
name: '',
quality: 0,
takeAgain: 0,
difficulty: 0,
students: 0
};
return uea;
}).sort(({ teacher: a }, { teacher: b }) => {
const sort_quality = (a.quality < b.quality) ? -1 : ((a.quality > b.quality) ? 1 : 0);
const sort_students = (a.students < b.students) ? -1 : ((a.students > b.students) ? 1 : 0);
return sort_quality || sort_students;
}).reverse());
}, new Map());
const JSONUEA = JSON.stringify(subjects, (key, value) => {
return value instanceof Map
? {
subjects: Array.from(value.entries()).map(([k]) => k),
schedules: Array.from(value.entries())
}
: value;
});
(0, fs_1.writeFileSync)(config_1.config.JSONUEA, JSONUEA, 'utf-8');
(0, utils_1.log)(`Uea´s data saved as "${config_1.config.JSONUEA}"`);
}
const all_combinations = new Map();
(0, utils_1.log)('Choosing the most optimal schedules');
for (const [subject, schedules_root] of subjects) {
const filteres_subjects = Array.from(subjects.keys())
.filter(s => s !== subject)
.sort((a, b) => (a < b) ? -1 : ((a > b) ? 1 : 0));
for (const schedule_data of schedules_root) {
const compatible_schedules = new Map();
if (!schedule_data.teacher.name)
break;
compatible_schedules.set(subject, schedule_data);
filteres_subjects.forEach((subject_iteration, i) => {
const schedules_iteration = subjects.get(subject_iteration);
for (const schedule_curr of schedules_iteration) {
let bool_val = true;
for (const [, uea] of compatible_schedules) {
for (const day of utils_1.week) {
const uea_day = uea[day];
const curr_day = schedule_curr[day];
if (uea_day && curr_day) {
if (uea_day.starts === curr_day.starts ||
uea_day.ends === curr_day.ends ||
(curr_day.starts > uea_day.starts && curr_day.starts < uea_day.ends) ||
(curr_day.ends > uea_day.starts && curr_day.ends < uea_day.ends) ||
(uea_day.starts > curr_day.starts && uea_day.starts < curr_day.ends) ||
(uea_day.ends > curr_day.starts && uea_day.ends < curr_day.ends)) {
bool_val = false;
break;
}
}
}
}
if (!!bool_val && !!schedule_curr.teacher.name) {
compatible_schedules.set(subject_iteration, schedule_curr);
break;
}
}
});
if (compatible_schedules.size == subjects.size) {
const sorted_compatible = Array.from(compatible_schedules)
.sort(([a], [b]) => (a < b) ? -1 : ((a > b) ? 1 : 0));
const SCHEDULECODE = sorted_compatible.reduce((prev, [subject_code, schedule_code]) => {
prev += schedule_code.group;
prev += subject_code.substring(0, 3).toUpperCase();
prev += subject_code.slice(-3).toUpperCase();
prev += schedule_code.teacher.name.substring(0, 3).toUpperCase();
prev += schedule_code.teacher.name.slice(-3).toUpperCase();
return prev;
}, '').padEnd(subjects.size * 18, '0');
all_combinations.set(SCHEDULECODE, new Map(sorted_compatible));
}
}
}
let sorted_iterations = Array.from(all_combinations).sort(([, schedule_a], [, schedule_b]) => {
const a = Array.from(schedule_a).reduce((prev, [, schedule_percent]) => {
return prev += schedule_percent.teacher.quality;
}, 0) / schedule_a.size;
const b = Array.from(schedule_b).reduce((prev, [, schedule_percent]) => {
return prev += schedule_percent.teacher.quality;
}, 0) / schedule_b.size;
return a - b;
}).reverse();
if (yield (0, utils_1.confirm)('Do you want to prioritize a teacher')) {
const teacher_list = yield (0, utils_1.selectTeachers)(teachers);
sorted_iterations = sorted_iterations.filter(([, schedule_filter]) => {
const filtered = Array.from(schedule_filter).filter(([, { teacher }]) => teacher_list.includes(teacher.name));
return filtered.length >= teacher_list.length;
});
if (sorted_iterations.length < 1)
throw new Error("No combinations lefts with this teachers");
}
const { subjects_info, hours } = yield (0, utils_1.selectSchedule)(sorted_iterations);
const doc = new pdfkit_1.default();
const max_size = Math.max.apply(Math, subjects_info.map(({ teacher }) => teacher.length));
const x = 50;
const x_mins = doc.page.width - x;
if (!(0, fs_1.existsSync)(config_1.config.PDF.DIRECTORY))
(0, fs_1.mkdirSync)(config_1.config.PDF.DIRECTORY);
doc.pipe((0, fs_1.createWriteStream)(config_1.config.PDF.SCHEDULE));
doc.image('assets/icon-1024x1024.png', x, 50, { width: 30 })
.fontSize(22.5)
.text('Horario hecho por Cétr!co.Productions', x + 40, 56);
doc.moveDown();
subjects_info.forEach(({ key, subject, teacher }) => {
const y = doc.y;
let t;
doc.fontSize(10)
.text(key, x, y)
.text(teacher, t = x + key.length * 6, y)
.text(subject, t + max_size * 5.5, y);
});
doc.moveDown();
const y_start = doc.y;
const hour_size = 5 * 8;
const average_width = (x_mins - x - hour_size) / 5;
const margin_top = 4.5;
doc.moveTo(x, y_start).lineTo(x_mins, y_start).stroke();
utils_1.week.reduce((size, day) => {
doc.text(day, size, y_start + margin_top, { width: average_width, align: 'center' });
size += average_width;
return size;
}, x + hour_size);
doc.moveTo(x, doc.y).lineTo(x_mins, doc.y).stroke();
Object.keys(hours).forEach(hour => {
const y = doc.y;
doc.text(hour, x, y + margin_top, { width: hour_size, align: 'center' });
utils_1.week.reduce((size, day) => {
const hour_day = hours[hour][day];
if (hour_day)
doc.text(hour_day, size, y + margin_top, { width: average_width, align: 'center' });
size += average_width;
return size;
}, x + hour_size);
doc.moveTo(x, doc.y).lineTo(x_mins, doc.y).stroke();
});
doc.moveTo(x, y_start).lineTo(x, doc.y).stroke();
doc.moveTo(x_mins, y_start).lineTo(x_mins, doc.y).stroke();
doc.end();
const command = platform.startsWith('win') ? 'start' : platform === 'darwin' ? 'open' : 'xdg-open';
(0, child_process_1.execSync)(command + ' ' + config_1.config.PDF.SCHEDULE);
if (!(yield (0, utils_1.confirm)('Do you want to keep the uea backup')))
(0, fs_1.unlinkSync)(config_1.config.JSONUEA);
process.exit(0);
}
catch (e) {
console.error(e);
process.exit(1);
}
finally {
(0, utils_1.log)(`Done`);
}
}))(process);