manga-crawl-lib
Version:
A library for scraping manga from various websites.
77 lines (76 loc) • 4.24 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.useGetDataChapter = void 0;
const validate_1 = require("../utils/validate");
const useGetDataChapter = (params) => __awaiter(void 0, void 0, void 0, function* () {
const { puppeteer, cheerioApi, url, prev_chapter, next_chapter, baseUrl, mainContentSelector, titleSelector, imageSelectorAll, originImageAttr, cdnImageAttr, prevChapterSelector, nextChapterSelector, } = params;
if (cheerioApi === undefined) {
const content = yield puppeteer.$(mainContentSelector);
const title = (0, validate_1.not_null)(yield content.$eval(titleSelector, (el) => el.textContent));
const images = yield Promise.all((yield content.$$(`${imageSelectorAll}`)).map((e, i) => __awaiter(void 0, void 0, void 0, function* () {
const _data_image = yield e.evaluate((el, originImageAttr, cdnImageAttr) => {
return Object.assign(Object.assign({ src_origin: el.getAttribute(originImageAttr) }, (cdnImageAttr
? { src_cdn: el.getAttribute(cdnImageAttr) }
: {})), { alt: el.getAttribute('alt') });
}, originImageAttr, cdnImageAttr);
return Object.assign(Object.assign({ _id: i, src_origin: (0, validate_1.not_null)(_data_image.src_origin).startsWith('//')
? `https:${(0, validate_1.not_null)(_data_image.src_origin)}`
: (0, validate_1.not_null)(_data_image.src_origin) }, ((0, validate_1.not_null)(_data_image.src_cdn) !== ''
? {
src_cdn: (0, validate_1.not_null)(_data_image.src_cdn).startsWith('//')
? `https:${(0, validate_1.not_null)(_data_image.src_cdn)}`
: (0, validate_1.not_null)(_data_image.src_cdn),
}
: {})), { alt: (0, validate_1.not_null)(_data_image.alt) });
})));
const prev = {};
if (prev_chapter === undefined) {
const prev_chapter_get = yield content
.$eval(prevChapterSelector, (el) => {
return {
url_chapter: el.getAttribute('href'),
};
})
.catch(() => null);
if (prev_chapter_get !== null) {
prev.url = (0, validate_1.not_null)(prev_chapter_get === null || prev_chapter_get === void 0 ? void 0 : prev_chapter_get.url_chapter);
prev.parent_href = url;
prev.path = url.substring(`${baseUrl}`.length);
}
}
const next = {};
if (next_chapter === undefined) {
const next_chapter_get = yield content
.$eval(nextChapterSelector, (el) => {
return {
url_chapter: el.getAttribute('href'),
};
})
.catch(() => null);
if (next_chapter_get !== null) {
next.url = (0, validate_1.not_null)(next_chapter_get === null || next_chapter_get === void 0 ? void 0 : next_chapter_get.url_chapter);
next.parent_href = url;
next.path = url.substring(`${baseUrl}`.length);
}
}
return {
title,
chapter_data: images,
next_chapter: Object.keys(next).length === 0 ? null : next,
prev_chapter: Object.keys(prev).length === 0 ? null : prev,
};
}
else {
throw new Error('not yet define');
}
});
exports.useGetDataChapter = useGetDataChapter;