UNPKG

manga-crawl-lib

Version:

A library for scraping manga from various websites.

324 lines (323 loc) 17.3 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.Nettruyen = void 0; const puppeteer_1 = __importDefault(require("puppeteer")); const validate_1 = require("../utils/validate"); class Nettruyen { constructor(baseUrl) { this.baseUrl = baseUrl; this.browser = puppeteer_1.default.launch({ headless: 'new', }); this.all_genres = []; } search(keyword, page = 1) { return __awaiter(this, void 0, void 0, function* () { const _page = yield (yield this.browser).newPage(); yield _page.setRequestInterception(true); _page.on('request', (req) => { if (req.resourceType() !== 'document') req.abort(); else req.continue(); }); yield _page.goto(`${this.baseUrl}/tim-truyen?keyword=${keyword}${page > 1 ? `&page=${page}` : ``}`); const element = yield _page.$$('#ctl00_divCenter > div.Module.Module-170 > div > div.items > div > div.item > figure'); const is_multipage = yield _page .$eval('#ctl00_mainContent_ctl01_divPager', () => true) .catch(() => false); const canNext = is_multipage ? yield _page .$eval('#ctl00_mainContent_ctl01_divPager > ul > li > a.next-page', () => true) .catch(() => false) : false; const canPrev = is_multipage ? yield _page .$eval('#ctl00_mainContent_ctl01_divPager > ul > li > a.prev-page', () => true) .catch(() => false) : false; const totalPage = is_multipage ? parseInt((0, validate_1.not_null)(yield _page.$eval('#ctl00_mainContent_ctl01_divPager > ul > li:last-child > a', (el) => el.getAttribute('href'))).split('page=')[1]) : 0; return { totalData: element.length, totalPage, currentPage: page !== undefined ? page : 1, canNext, canPrev, data: yield Promise.all(element.map((e, i) => __awaiter(this, void 0, void 0, function* () { const href = (0, validate_1.not_null)(yield e.$eval('div.image > a', (el) => el.getAttribute('href'))); const title = (0, validate_1.not_null)(yield e.$eval('figcaption > h3 > a', (el) => el.textContent)); const image_thumbnail = (0, validate_1.not_null)(yield e.$eval('div.image > a > img', (el) => el.getAttribute('data-original'))); return { _id: i, title, image_thumbnail: image_thumbnail.startsWith('//') ? `https:${image_thumbnail}` : image_thumbnail, href, }; }))), }; }); } getListByGenre(genre, page, status, sort) { return __awaiter(this, void 0, void 0, function* () { const _page = yield (yield this.browser).newPage(); let path = genre.path; if (sort !== undefined) { path += `?sort=${sort}${status !== undefined ? `&status=${status}` : '&status=-1'}${page !== undefined ? `&page=${page}` : ''}`; } else if (status !== undefined) { path += `?status=${status}${page !== undefined ? `&page=${page}` : ''}`; } else if (page !== undefined) { path += `?page=${page}`; } yield _page.setRequestInterception(true); _page.on('request', (req) => { if (req.resourceType() !== 'document') req.abort(); else req.continue(); }); yield _page.goto(`${this.baseUrl}${path}`); const element = yield _page.$$('#ctl00_divCenter > div.Module.Module-170 > div > div.items > div > div.item > figure'); const canNext = yield _page .$eval('#ctl00_mainContent_ctl01_divPager > ul > li > a.next-page', () => true) .catch(() => false); const canPrev = yield _page .$eval('#ctl00_mainContent_ctl01_divPager > ul > li > a.prev-page', () => true) .catch(() => false); const totalPage = parseInt((0, validate_1.not_null)(yield _page.$eval('#ctl00_mainContent_ctl01_divPager > ul > li:last-child > a', (el) => el.getAttribute('href'))).split('page=')[1]); return { totalData: element.length, totalPage, currentPage: page !== undefined ? page : 1, canNext, canPrev, data: yield Promise.all(element.map((e, i) => __awaiter(this, void 0, void 0, function* () { const href = (0, validate_1.not_null)(yield e.$eval('div.image > a', (el) => el.getAttribute('href'))); const title = (0, validate_1.not_null)(yield e.$eval('figcaption > h3 > a', (el) => el.textContent)); const image_thumbnail = (0, validate_1.not_null)(yield e.$eval('div.image > a > img', (el) => el.getAttribute('data-original'))); return { _id: i, title, image_thumbnail: image_thumbnail.startsWith('//') ? `https:${image_thumbnail}` : image_thumbnail, href, }; }))), }; }); } getDataChapter(url_chapter, url, path, prev_chapter, next_chapter) { return __awaiter(this, void 0, void 0, function* () { url = url !== undefined ? url : ''; path = path !== undefined ? path : ''; const _page = yield (yield this.browser).newPage(); yield _page.setRequestInterception(true); _page.on('request', (req) => { if (req.resourceType() !== 'document') req.abort(); else req.continue(); }); yield _page.goto(url_chapter); const content = yield _page.$('#ctl00_divCenter > div > div.reading-detail.box_doc'); const title = (0, validate_1.not_null)(yield _page.$eval('#ctl00_divCenter > div > div:nth-child(1) > div.top > h1', (el) => el.textContent)); const images = yield Promise.all((yield content.$$('div.page-chapter > img')).map((e, i) => __awaiter(this, void 0, void 0, function* () { const _data_image = yield e.evaluate((el) => { return { src_origin: el.getAttribute('data-original'), src_cdn: el.getAttribute('data-cdn'), alt: el.getAttribute('alt'), }; }); return Object.assign(Object.assign({ _id: i, src_origin: (0, validate_1.not_null)(_data_image.src_origin).startsWith('//') ? `https:${(0, validate_1.not_null)(_data_image.src_origin)}` : (0, validate_1.not_null)(_data_image.src_origin) }, ((0, validate_1.not_null)(_data_image.src_cdn) !== '' ? { src_cdn: (0, validate_1.not_null)(_data_image.src_cdn).startsWith('//') ? `https:${(0, validate_1.not_null)(_data_image.src_cdn)}` : (0, validate_1.not_null)(_data_image.src_cdn), } : {})), { alt: (0, validate_1.not_null)(_data_image.alt) }); }))); const prev = {}; if (prev_chapter === undefined) { const prev_chapter_get = yield _page.$eval('#chapterNav > a.prev.a_prev', (el) => { return { url_chapter: el.getAttribute('href'), }; }); prev.url = (0, validate_1.not_null)(prev_chapter_get.url_chapter); prev.parent_href = url; prev.path = url.substring(`${this.baseUrl}`.length); } const next = {}; if (next_chapter === undefined) { const next_chapter_get = yield _page.$eval('#chapterNav > a.next.a_next', (el) => { return { url_chapter: el.getAttribute('href'), }; }); next.url = (0, validate_1.not_null)(next_chapter_get.url_chapter); next.parent_href = url; next.path = url.substring(`${this.baseUrl}`.length); } return { url, path, chapter_data: images, title, next_chapter: next_chapter !== undefined ? next_chapter : next.url !== '#' ? next : null, prev_chapter: prev_chapter !== undefined ? prev_chapter : prev.url !== '#' ? prev : null, }; }); } getDetailManga(url) { return __awaiter(this, void 0, void 0, function* () { const _page = yield (yield this.browser).newPage(); yield _page.setRequestInterception(true); _page.on('request', (req) => { if (req.resourceType() !== 'document') req.abort(); else req.continue(); }); yield _page.goto(url); const content = yield _page.$('#ctl00_divCenter'); const title = yield content.$eval('article > h1', (el) => el.textContent); const description = (yield content.$eval('#item-detail > div.detail-content > p', (el) => el.textContent)) || ''; const thumbnail = (yield content.$eval('#item-detail > div.detail-info > div > div.col-xs-4.col-image > img', (el) => el.getAttribute('src'))) || ''; const path = url.substring(`${this.baseUrl}`.length); const author = yield content.$eval('#item-detail > div.detail-info > div > div.col-xs-8.col-info > ul > li.author.row > p.col-xs-8', (el) => el.textContent); const status = yield content.$eval('#item-detail > div.detail-info > div > div.col-xs-8.col-info > ul > li.status.row > p.col-xs-8', (el) => el.textContent); const genres = yield Promise.all((yield content.$$('#item-detail > div.detail-info > div > div.col-xs-8.col-info > ul > li.kind.row > p.col-xs-8 > a')).map((e) => __awaiter(this, void 0, void 0, function* () { const data = yield e.evaluate((el) => { return { url: el.getAttribute('href'), path: el.getAttribute('href'), name: el.textContent, }; }); return { url: (0, validate_1.not_null)(data.url), path: (0, validate_1.not_null)(data.path).substring(`${this.baseUrl}`.length), name: (0, validate_1.not_null)(data.name), }; }))); const views = yield content.$eval('#item-detail > div.detail-info > div.row > div.col-xs-8.col-info > ul > li:last-child > p.col-xs-8', (el) => el.textContent); const chapters = yield Promise.all((yield content.$$('.list-chapter > nav > ul > li')).map((e) => __awaiter(this, void 0, void 0, function* () { const chapter_anchor = yield e.$eval('.col-xs-5.chapter > a', (el) => { const data = { title: el.textContent, url: el.getAttribute('href'), }; return { title: data.title, url: data.url, }; }); const last_update = yield e.$eval('.col-xs-4.text-center.no-wrap.small', (el) => el.textContent); const views_chapter = yield e.$eval('.col-xs-3.text-center.small', (el) => el.textContent); return { title: (0, validate_1.not_null)(chapter_anchor.title), url: (0, validate_1.not_null)(chapter_anchor.url), path: (0, validate_1.not_null)(chapter_anchor.url).substring(`${this.baseUrl}`.length), parent_href: url, last_update: (0, validate_1.not_null)(last_update), views: (0, validate_1.not_null)(views_chapter), }; }))); const rate = (0, validate_1.not_null)(yield content.$eval('#item-detail > div.detail-info > div > div.col-xs-8.col-info > div.mrt5.mrb10 > span > span:nth-child(1)', (el) => el.textContent)); const rate_number = (0, validate_1.not_null)(yield content.$eval('#item-detail > div.detail-info > div > div.col-xs-8.col-info > div.mrt5.mrb10 > span > span:nth-child(3)', (el) => el.textContent)); const follows = (0, validate_1.not_null)(yield content.$eval('#item-detail > div.detail-info > div > div.col-xs-8.col-info > div.follow > span > b', (el) => el.textContent)); return { title: (0, validate_1.not_null)(title), path, author: (0, validate_1.not_null)(author), url, status: (0, validate_1.not_null)(status), genres, views: (0, validate_1.not_null)(views), rate, rate_number, follows, chapters, description, thumbnail: thumbnail.startsWith('//') ? `https:${thumbnail}` : thumbnail, }; }); } getListLatestUpdate(page = 1) { return __awaiter(this, void 0, void 0, function* () { const _page = yield (yield this.browser).newPage(); yield _page.setRequestInterception(true); _page.on('request', (req) => { if (req.resourceType() !== 'document') req.abort(); else req.continue(); }); yield _page.goto(`${this.baseUrl}${page > 1 ? `/?page=${page}` : ``}`); const element = yield _page.$$('#ctl00_divCenter > div > div > div.items > div.row > div.item'); const canNext = yield _page .$eval('#ctl00_mainContent_ctl00_divPager > ul > li > a.next-page', () => true) .catch(() => false); const canPrev = yield _page .$eval('#ctl00_mainContent_ctl00_divPager > ul > li > a.prev-page', () => true) .catch(() => false); const totalPage = parseInt((0, validate_1.not_null)(yield _page.$eval('#ctl00_mainContent_ctl00_divPager > ul > li:last-child > a', (el) => el.getAttribute('href'))).split('page=')[1]); return { totalData: element.length, totalPage, currentPage: page, canNext, canPrev, data: yield Promise.all(element.map((e, i) => __awaiter(this, void 0, void 0, function* () { const image_thumbnail = yield e.$eval('.image > a > img', (el) => el.getAttribute('data-original')); const link = yield e.$eval('figure > figcaption > h3 > a', (el) => { return { title: el.textContent, href: el.getAttribute('href'), }; }); return { _id: i, title: (0, validate_1.not_null)(link.title), href: (0, validate_1.not_null)(link.href), image_thumbnail: image_thumbnail.startsWith('//') ? `https:${image_thumbnail}` : image_thumbnail, }; }))), }; }); } } exports.Nettruyen = Nettruyen;