@davidpunya/web-scraper
Version:
The library web scraper for Restfull API's
342 lines (337 loc) • 14.4 kB
JavaScript
const axios = require("axios");
const cheerio = require("cheerio");
const fs = require("fs");
const qs = require("qs");
const path = require("path");
const util = require("util");
const fetch = (...args) => import('node-fetch').then(({default: fetch}) => fetch(...args));
const Tools = new (require("./tools"));
module.exports = class NSFW {
nhentaiGet = async (code) => {
try {
const baseUrl = "https://nhentai.net";
const galleryUrl = `${baseUrl}/g/${code}`;
const { headers } = await axios.get(baseUrl);
const cookie = headers["set-cookie"].join("; ");
const response = await axios.get(galleryUrl, {
headers: {
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
Cookie: cookie,
Referer: baseUrl,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
});
if (response.status === 404) {
console.log(JSON.stringify({
status: 404,
creator: "David XD",
msg: "Cannot Find " + code + " :/"
}));
return console.log(JSON.stringify({
status: 404,
creator: "David XD",
msg: "Cannot Find " + code + " :/"
}));
};
const $ = cheerio.load(response.data);
const thumburl = $("#content #cover img").attr("data-src");
const mediaID = thumburl.split("/")[4];
const titleEN = {
title: $("#info h1.title").text(),
pretty: $("#info h1.title .pretty").text()
};
const titleJP = {
title: $("#info h2.title").text(),
pretty: $("#info h2.title .pretty").text()
};
const galleryId = $("#info #gallery_id").text().replace("#", "");
const thumb = {
img: thumburl,
w: $("#cover img").attr("width"),
h: $("#cover img").attr("height")
};
const numPages = $("#tags .tag-container").eq(7).find(".tags .name").text();
const uploadTime = $("#tags tag-container").eq(8).find(".tags .time").text();
const result = {
status: 200,
creator: "David XD",
data: {
title: {
en: titleEN,
jp: titleJP
},
id: galleryId,
media_id: mediaID,
count: numPages,
upload_at: uploadTime,
images: {
thumb: thumb,
pages: []
},
tags: []
}
};
$("#tags .tag-container").each((i, elem) => {
const type = $(elem).text().trim().split("\t")[0].split("\n")[0].replace(":", "");
if (type !== "Pages") {
$(elem).find(".tags a").each((_, tagElem) => {
result.data.tags.push({
type: type,
link: $(tagElem).attr("href"),
name: $(tagElem).find(".name").text(),
count: $(tagElem).find(".count").text()
});
});
}
});
for (let i = 1; i <= numPages; i++) {
const res = await axios.get(`${galleryUrl}/${i}`, {
headers: {
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
Cookie: cookie,
Referer: galleryUrl,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
});
const $$ = cheerio.load(res.data);
const img = $$("#image-container img");
result.data.images.pages.push({
img: img.attr("src"),
w: img.attr("width"),
h: img.attr("height")
});
};
//console.log(result);
return result;
} catch (error) {
console.error(error);
return JSON.stringify({
status: 500,
creator: "David XD",
msg: "Something Error :/"
});
};
};
nhentaiPDF = async(code) => {
try {
const puki = await this.nhentaiGet(code);
const data = puki.data;
if (data === 0) {
console.log({
status: 404,
crator: "David XD",
msg: "Cannot Find Code " + code + " :/"
});
return JSON.stringify({
status: 404,
crator: "David XD",
msg: "Cannot Find Code " + code + " :/"
});
};
let title = data.title;
let hal = data.images.pages.map(res => ({
img: res.img,
w: res.w,
h: res.h
}));
let tag = data.tags.map(e => ({
type: e.type,
url: "https://nhentai.net" + e.link,
name: e.name,
caount: e.count
}));
let doc = [];
data.images.pages.map(page => {
doc.push(page.img);
});
let pdfBuffer = await (await Tools.toPDFBuffer(doc)).data;
let result = {
status: 200,
cretor: "David XD",
result: {
title: {
en: {
title: title.en.title,
pretty: title.en.pretty
},
jp: {
title: title.jp.title,
pretty: title.jp.pretty
}
},
id: data.id,
media_id: data.media_id,
pages_count: data.count,
images: {
thumb: {
img: data.images.thumb.img,
w: data.images.thumb.w,
h: data.images.thumb.h
},
pages: hal
},
tags: tag,
result_pdf: pdfBuffer,
}
};
//console.log(result);
return result;
} catch (e) {
console.log(e);
return JSON.stringify({
status: 500,
creator: "David XD",
msg: util.format(e)
});
};
};
nhentaiLatest = async() => {
const baseUrl = "https://nhentai.net";
const { headers } = await axios.get(baseUrl);
const cookie = headers["set-cookie"].join("; ");
let result = {
status: 200,
creator: "David XD",
result: {
popular_now: [],
new_upload: []
}
}
await axios({
url: baseUrl,
method: "GET",
headers: {
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
Cookie: cookie,
Referer: baseUrl,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
}).then((res) => {
const $ = cheerio.load(res.data);
$("#content > .container.index-container.index-popular > .gallery").each(function() {
result.result.popular_now.push({
title: $(this).find(".caption").text().trim(),
url: baseUrl + $(this).find("a").attr("href"),
img: {
url: $(this).find("a > img").attr("data-src"),
w: $(this).find("a > img").attr("width"),
h: $(this).find("a > img").attr("height")
}
});
});
$("#content > .container.index-container > .gallery").each(function() {
result.result.new_upload.push({
title: $(this).find(".caption").text().trim(),
url: baseUrl + $(this).find("a").attr("href"),
img: {
url: $(this).find("a > img").attr("data-src"),
w: $(this).find("a > img").attr("width"),
h: $(this).find("a > img").attr("height")
}
});
});
}).catch((err) => {
console.error(err);
return JSON.stringify({
status: 500,
creator: "David XD",
msg: util.format(err)
});
});
//console.log(result);
return result;
};
nhentaiSearch = async(query, page) => {
const baseUrl = "https://nhentai.net";
const { headers } = await axios.get(baseUrl);
const cookie = headers["set-cookie"].join("; ");
let result = {
status: 200,
creator: "David XD",
result: {
count: "",
data: []
}
};
await axios({
url: "https://nhentai.net/search/?q=" + query + "&page="+ page,
method: "GET",
headers: {
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Cookie": cookie,
Referer: baseUrl,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
}).then((res) => {
const $ = cheerio.load(res.data);
result.result.count = $("#content > h1").text().trim();
$("#content > .container.index-container > .gallery").each(function() {
result.result.data.push({
title: $(this).find(".caption").text().trim(),
url: baseUrl + $(this).find("a").attr("href"),
img: {
url: $(this).find("a > img").attr("data-src"),
w: $(this).find("a > img").attr("width"),
h: $(this).find("a > img").attr("height")
}
});
});
}).catch((err) => {
console.error(err);
return JSON.stringify({
status: 500,
creator: "David XD",
msg: util.format(err)
});
});
//console.log(result);
return result;
};
doujindesuLatest = async(page = 1) => {
const baseUrl = "https://doujindesu.tv";
const { headers } = await axios.get(baseUrl);
const cookie = headers["set-cookie"].join("; ");
const result = {
status: 200,
crator: "David XD",
result: {}
};
await axios({
url: "https://doujindesu.tv/doujin/page/" + page,
method: "GET",
headers: {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Cookie": `PHPSESSID=giogppgp8ejplo0b01lehj0hp3; __cf_bm=K3n8NkZT7thzs9HNy1P2oupLZTWGrMPEOojFXLoRQt0-1719401109-1.0.1.1-vgJUrwX1EyQIyvidhy2ZRh5CHuDp4DLAZwmsEUyld5zUFQmkaWfzy9VDVxKBfong4HCYXRCjYBz48Kcb2ehdtw; cf_clearance=i.xy1Nmf1d0OMk.Y69wag5ZQE5eS01utMSaJpRt1ZtM-1719401110-1.0.1.1-EOrpjRxb2pQkFPnbwdt9Crkkn7bE0iTfJg0FT.vLFm._OsEIkebNFcSzwzvYCVR3TIMatAeYJ72PRt7FlDNk_Q; __PPU_CAIFRQ=ACZLEAAAAAAAAAAB; __PPU_CAIFRT=ACZLEAAAAABmfEkQ`,
"Sec-Ch-Ua": `"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"`,
"User-Agent": "okhttp/4.9.3"
}
}).then((res) => {
const $ = cheerio.load(res.data);
const mg = $("#archives").eq(0).find(".entries > article");
result.result = {
latest_manga: $(mg).map(function() {
let baru = $(this).find(".metadata > .artists > .dtch > div").text().trim();
if (baru === "NEW") {
var isNew = true
}
return {
title: $(this).find("a").attr("title"),
type: $(this).find("a > .thumbnail > span").text().trim(),
chapter: $(this).find(".metadata > .artists > a > span").text().trim(),
new: isNew ? true : baru,
url: baseUrl + $(this).find("a").attr("href")
};
}).get(),
}
}).catch((err) => {
console.error(err);
return {
status: 500,
creator: "David XD",
msg: util.format(err)
};
});
//console.log(result);
return result;
};
};