UNPKG

@davidpunya/web-scraper

Version:

The library web scraper for Restfull API's

342 lines (337 loc) 14.4 kB
const axios = require("axios"); const cheerio = require("cheerio"); const fs = require("fs"); const qs = require("qs"); const path = require("path"); const util = require("util"); const fetch = (...args) => import('node-fetch').then(({default: fetch}) => fetch(...args)); const Tools = new (require("./tools")); module.exports = class NSFW { nhentaiGet = async (code) => { try { const baseUrl = "https://nhentai.net"; const galleryUrl = `${baseUrl}/g/${code}`; const { headers } = await axios.get(baseUrl); const cookie = headers["set-cookie"].join("; "); const response = await axios.get(galleryUrl, { headers: { Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", Cookie: cookie, Referer: baseUrl, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" } }); if (response.status === 404) { console.log(JSON.stringify({ status: 404, creator: "David XD", msg: "Cannot Find " + code + " :/" })); return console.log(JSON.stringify({ status: 404, creator: "David XD", msg: "Cannot Find " + code + " :/" })); }; const $ = cheerio.load(response.data); const thumburl = $("#content #cover img").attr("data-src"); const mediaID = thumburl.split("/")[4]; const titleEN = { title: $("#info h1.title").text(), pretty: $("#info h1.title .pretty").text() }; const titleJP = { title: $("#info h2.title").text(), pretty: $("#info h2.title .pretty").text() }; const galleryId = $("#info #gallery_id").text().replace("#", ""); const thumb = { img: thumburl, w: $("#cover img").attr("width"), h: $("#cover img").attr("height") }; const numPages = $("#tags .tag-container").eq(7).find(".tags .name").text(); const uploadTime = $("#tags tag-container").eq(8).find(".tags .time").text(); const result = { status: 200, creator: "David XD", data: { title: { en: titleEN, jp: titleJP }, id: galleryId, media_id: mediaID, count: numPages, upload_at: uploadTime, images: { thumb: thumb, pages: [] }, tags: [] } }; $("#tags .tag-container").each((i, elem) => { const type = $(elem).text().trim().split("\t")[0].split("\n")[0].replace(":", ""); if (type !== "Pages") { $(elem).find(".tags a").each((_, tagElem) => { result.data.tags.push({ type: type, link: $(tagElem).attr("href"), name: $(tagElem).find(".name").text(), count: $(tagElem).find(".count").text() }); }); } }); for (let i = 1; i <= numPages; i++) { const res = await axios.get(`${galleryUrl}/${i}`, { headers: { Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", Cookie: cookie, Referer: galleryUrl, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" } }); const $$ = cheerio.load(res.data); const img = $$("#image-container img"); result.data.images.pages.push({ img: img.attr("src"), w: img.attr("width"), h: img.attr("height") }); }; //console.log(result); return result; } catch (error) { console.error(error); return JSON.stringify({ status: 500, creator: "David XD", msg: "Something Error :/" }); }; }; nhentaiPDF = async(code) => { try { const puki = await this.nhentaiGet(code); const data = puki.data; if (data === 0) { console.log({ status: 404, crator: "David XD", msg: "Cannot Find Code " + code + " :/" }); return JSON.stringify({ status: 404, crator: "David XD", msg: "Cannot Find Code " + code + " :/" }); }; let title = data.title; let hal = data.images.pages.map(res => ({ img: res.img, w: res.w, h: res.h })); let tag = data.tags.map(e => ({ type: e.type, url: "https://nhentai.net" + e.link, name: e.name, caount: e.count })); let doc = []; data.images.pages.map(page => { doc.push(page.img); }); let pdfBuffer = await (await Tools.toPDFBuffer(doc)).data; let result = { status: 200, cretor: "David XD", result: { title: { en: { title: title.en.title, pretty: title.en.pretty }, jp: { title: title.jp.title, pretty: title.jp.pretty } }, id: data.id, media_id: data.media_id, pages_count: data.count, images: { thumb: { img: data.images.thumb.img, w: data.images.thumb.w, h: data.images.thumb.h }, pages: hal }, tags: tag, result_pdf: pdfBuffer, } }; //console.log(result); return result; } catch (e) { console.log(e); return JSON.stringify({ status: 500, creator: "David XD", msg: util.format(e) }); }; }; nhentaiLatest = async() => { const baseUrl = "https://nhentai.net"; const { headers } = await axios.get(baseUrl); const cookie = headers["set-cookie"].join("; "); let result = { status: 200, creator: "David XD", result: { popular_now: [], new_upload: [] } } await axios({ url: baseUrl, method: "GET", headers: { Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", Cookie: cookie, Referer: baseUrl, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" } }).then((res) => { const $ = cheerio.load(res.data); $("#content > .container.index-container.index-popular > .gallery").each(function() { result.result.popular_now.push({ title: $(this).find(".caption").text().trim(), url: baseUrl + $(this).find("a").attr("href"), img: { url: $(this).find("a > img").attr("data-src"), w: $(this).find("a > img").attr("width"), h: $(this).find("a > img").attr("height") } }); }); $("#content > .container.index-container > .gallery").each(function() { result.result.new_upload.push({ title: $(this).find(".caption").text().trim(), url: baseUrl + $(this).find("a").attr("href"), img: { url: $(this).find("a > img").attr("data-src"), w: $(this).find("a > img").attr("width"), h: $(this).find("a > img").attr("height") } }); }); }).catch((err) => { console.error(err); return JSON.stringify({ status: 500, creator: "David XD", msg: util.format(err) }); }); //console.log(result); return result; }; nhentaiSearch = async(query, page) => { const baseUrl = "https://nhentai.net"; const { headers } = await axios.get(baseUrl); const cookie = headers["set-cookie"].join("; "); let result = { status: 200, creator: "David XD", result: { count: "", data: [] } }; await axios({ url: "https://nhentai.net/search/?q=" + query + "&page="+ page, method: "GET", headers: { Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Cookie": cookie, Referer: baseUrl, "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" } }).then((res) => { const $ = cheerio.load(res.data); result.result.count = $("#content > h1").text().trim(); $("#content > .container.index-container > .gallery").each(function() { result.result.data.push({ title: $(this).find(".caption").text().trim(), url: baseUrl + $(this).find("a").attr("href"), img: { url: $(this).find("a > img").attr("data-src"), w: $(this).find("a > img").attr("width"), h: $(this).find("a > img").attr("height") } }); }); }).catch((err) => { console.error(err); return JSON.stringify({ status: 500, creator: "David XD", msg: util.format(err) }); }); //console.log(result); return result; }; doujindesuLatest = async(page = 1) => { const baseUrl = "https://doujindesu.tv"; const { headers } = await axios.get(baseUrl); const cookie = headers["set-cookie"].join("; "); const result = { status: 200, crator: "David XD", result: {} }; await axios({ url: "https://doujindesu.tv/doujin/page/" + page, method: "GET", headers: { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Cookie": `PHPSESSID=giogppgp8ejplo0b01lehj0hp3; __cf_bm=K3n8NkZT7thzs9HNy1P2oupLZTWGrMPEOojFXLoRQt0-1719401109-1.0.1.1-vgJUrwX1EyQIyvidhy2ZRh5CHuDp4DLAZwmsEUyld5zUFQmkaWfzy9VDVxKBfong4HCYXRCjYBz48Kcb2ehdtw; cf_clearance=i.xy1Nmf1d0OMk.Y69wag5ZQE5eS01utMSaJpRt1ZtM-1719401110-1.0.1.1-EOrpjRxb2pQkFPnbwdt9Crkkn7bE0iTfJg0FT.vLFm._OsEIkebNFcSzwzvYCVR3TIMatAeYJ72PRt7FlDNk_Q; __PPU_CAIFRQ=ACZLEAAAAAAAAAAB; __PPU_CAIFRT=ACZLEAAAAABmfEkQ`, "Sec-Ch-Ua": `"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"`, "User-Agent": "okhttp/4.9.3" } }).then((res) => { const $ = cheerio.load(res.data); const mg = $("#archives").eq(0).find(".entries > article"); result.result = { latest_manga: $(mg).map(function() { let baru = $(this).find(".metadata > .artists > .dtch > div").text().trim(); if (baru === "NEW") { var isNew = true } return { title: $(this).find("a").attr("title"), type: $(this).find("a > .thumbnail > span").text().trim(), chapter: $(this).find(".metadata > .artists > a > span").text().trim(), new: isNew ? true : baru, url: baseUrl + $(this).find("a").attr("href") }; }).get(), } }).catch((err) => { console.error(err); return { status: 500, creator: "David XD", msg: util.format(err) }; }); //console.log(result); return result; }; };