UNPKG

manga-crawl-lib

Version:

A library for scraping manga from various websites.

253 lines (252 loc) 12.4 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.AsuraScans = void 0; const puppeteer_1 = __importDefault(require("puppeteer")); const validate_1 = require("../utils/validate"); const getListLatest_1 = require("../hooks/getListLatest"); const getDataChapter_1 = require("../hooks/getDataChapter"); class AsuraScans { constructor(baseUrl) { this.baseUrl = baseUrl; this.browser = puppeteer_1.default.launch({ headless: 'new', }); this.all_genres = []; } search(keyword, page = 1) { return __awaiter(this, void 0, void 0, function* () { const _page = yield (yield this.browser).newPage(); yield _page.setRequestInterception(true); _page.on('request', (req) => { if (req.resourceType() !== 'document') req.abort(); else req.continue(); }); yield _page.goto(`${this.baseUrl}${page > 1 ? `/page/${page}` : ``}/?s=${keyword}`); const paramsSelector = { puppeteer: _page, wrapSelector: 'div.listupd > div.bs > div.bsx', titleSelector: 'a > div.bigor > div.tt', thumbnailSelector: 'a > div.limit > img', thumbnailAttr: 'src', hrefSelector: 'a', }; const data = yield (0, getListLatest_1.useGetDataItemsManga)(paramsSelector); const canNext = yield _page .$eval('div.pagination > a.next.page-numbers', () => true) .catch(() => false); const canPrev = yield _page .$eval('div.pagination > a.prev.page-numbers', () => true) .catch(() => false); const totalPages = yield _page.$$('div.pagination > a.page-numbers:not(.prev):not(.next)'); const totalPage = totalPages !== undefined ? Number(yield totalPages[totalPages.length - 1].evaluate((el) => el.textContent)) : 0; return { totalData: data.length, totalPage, currentPage: page !== undefined ? page : 1, canNext, canPrev, data, }; }); } getListByGenre(genre, page, status, sort) { return __awaiter(this, void 0, void 0, function* () { const _page = yield (yield this.browser).newPage(); const url = `${this.baseUrl}${genre.path}${page !== undefined && page > 1 ? `/page/${page}` : ``}`; yield _page.setRequestInterception(true); _page.on('request', (req) => { if (req.resourceType() !== 'document') req.abort(); else req.continue(); }); yield _page.goto(url); const paramsSelector = { puppeteer: _page, wrapSelector: 'div.listupd > div.bs > div.bsx', titleSelector: 'a > div.bigor > div.tt', thumbnailSelector: 'a > div.limit > img', thumbnailAttr: 'src', hrefSelector: 'a', }; const data = yield (0, getListLatest_1.useGetDataItemsManga)(paramsSelector); const canNext = yield _page .$eval('div.pagination > a.next.page-numbers', () => true) .catch(() => false); const canPrev = yield _page .$eval('div.pagination > a.prev.page-numbers', () => true) .catch(() => false); const totalPages = yield _page.$$('div.pagination > a.page-numbers:not(.prev):not(.next)'); const totalPage = totalPages !== undefined ? Number(yield totalPages[totalPages.length - 1].evaluate((el) => el.textContent)) : 0; return { totalData: data.length, totalPage, currentPage: page !== undefined ? page : 1, canNext, canPrev, data, }; }); } getDataChapter(url_chapter, url, path, prev_chapter, next_chapter) { return __awaiter(this, void 0, void 0, function* () { const _page = yield (yield this.browser).newPage(); yield _page.setRequestInterception(true); _page.on('request', (req) => { if (req.resourceType() !== 'document') req.abort(); else req.continue(); }); yield _page.goto(url_chapter); const paramsSelector = { puppeteer: _page, mainContentSelector: 'div.chapterbody > div.postarea > article', titleSelector: 'div.headpost > h1', imageSelectorAll: 'div#readerarea > p > img', originImageAttr: 'src', prevChapterSelector: '.navlef > .npv.r > div.nextprev > a.ch-prev-btn', nextChapterSelector: '.navlef > .npv.r > div.nextprev > a.ch-next-btn', baseUrl: this.baseUrl, url: url_chapter, }; const data = yield (0, getDataChapter_1.useGetDataChapter)(paramsSelector); const scripts = yield _page.$$('script'); const script = yield scripts[18].evaluate((e) => { return JSON.parse(e.textContent.split('ts_reader.run(')[1].split(');')[0]); }); return Object.assign(Object.assign(Object.assign(Object.assign({}, (url !== undefined ? { url } : {})), (path !== undefined ? { path } : {})), data), { next_chapter: script.nextUrl !== '' ? { url: script.nextUrl, parent_href: url !== undefined ? url : '', path: script.nextUrl.substring(`${this.baseUrl}`.length), } : null, prev_chapter: script.prevUrl !== '' ? { url: script.prevUrl, parent_href: url !== undefined ? url : '', path: script.prevUrl.substring(`${this.baseUrl}`.length), } : null }); }); } getDetailManga(url) { return __awaiter(this, void 0, void 0, function* () { const _page = yield (yield this.browser).newPage(); yield _page.setRequestInterception(true); _page.on('request', (req) => { if (req.resourceType() !== 'document') req.abort(); else req.continue(); }); yield _page.goto(url); const content = yield _page.$('div.postbody'); const title = yield content.$eval('article > div.bixbox.animefull > div.bigcontent.nobigcover > div.infox > h1', (el) => el.textContent); const path = url.substring(`${this.baseUrl}`.length); const author = yield content.$eval('article > div.bixbox.animefull > div.bigcontent.nobigcover > div.infox > div.flex-wrap > div:nth-child(2) > span', (el) => el.textContent); const status = yield content.$eval('article > div.bixbox.animefull > div.bigcontent.nobigcover > div.thumbook > div.rt > div.tsinfo > div:nth-child(1) > i', (el) => el.textContent); const genres = yield Promise.all((yield content.$$('article > div.bixbox.animefull > div.bigcontent.nobigcover > div.infox > div.wd-full > span > a')).map((e) => __awaiter(this, void 0, void 0, function* () { const data = yield e.evaluate((el) => { return { url: el.getAttribute('href'), path: el.getAttribute('href'), name: el.textContent, }; }); return { url: (0, validate_1.not_null)(data.url), path: (0, validate_1.not_null)(data.path).substring(`${this.baseUrl}`.length), name: (0, validate_1.not_null)(data.name), }; }))); const chapters = yield Promise.all((yield content.$$('#chapterlist > ul > li')).map((e) => __awaiter(this, void 0, void 0, function* () { const chapter_anchor = yield e.$eval('a', (el) => { const data = { title: el.children[0].textContent, url: el.getAttribute('href'), }; return { title: data.title, url: data.url, }; }); const last_update = yield e.$eval('a > span.chapterdate', (el) => el.textContent); return { title: (0, validate_1.not_null)(chapter_anchor.title), url: (0, validate_1.not_null)(chapter_anchor.url), path: (0, validate_1.not_null)(chapter_anchor.url).substring(`${this.baseUrl}`.length), parent_href: url, last_update: (0, validate_1.not_null)(last_update), }; }))); const rate = (0, validate_1.not_null)(yield content.$eval('article > div.bixbox.animefull > div.bigcontent.nobigcover > div.thumbook > div.rt > div.rating > div > div.num', (el) => el.textContent)); const follows = (0, validate_1.not_null)(yield content.$eval('article > div.bixbox.animefull > div.bigcontent.nobigcover > div.thumbook > div.rt > div.bmc', (el) => el.textContent)); return { title: (0, validate_1.not_null)(title), path, author: (0, validate_1.not_null)(author).replace(/\n/g, ''), url, status: (0, validate_1.not_null)(status), genres, rate, follows: follows.replace(/\D/g, ''), chapters, }; }); } getListLatestUpdate(page = 1) { return __awaiter(this, void 0, void 0, function* () { const _page = yield (yield this.browser).newPage(); yield _page.setRequestInterception(true); _page.on('request', (req) => { if (req.resourceType() !== 'document') req.abort(); else req.continue(); }); yield _page.goto(`${this.baseUrl}${page !== undefined && page > 1 ? `/page/${page}` : ``}`); const paramsSelector = { puppeteer: _page, wrapSelector: 'div.listupd > div.utao.styletwo', titleSelector: 'div.uta > div.luf > a > h4', thumbnailSelector: 'div.uta > div.imgu > a > img', thumbnailAttr: 'src', hrefSelector: 'div.uta > div.luf > a', }; const data = yield (0, getListLatest_1.useGetDataItemsManga)(paramsSelector); const canNext = yield _page .$eval('div.hpage > a.r', () => true) .catch(() => false); const canPrev = yield _page .$eval('div.hpage > a.l', () => true) .catch(() => false); return { data, totalData: data.length, currentPage: page, canNext, canPrev, }; }); } } exports.AsuraScans = AsuraScans;