@davidpunya/web-scraper
Version:
The library web scraper for Restfull API's
381 lines (373 loc) • 16.8 kB
JavaScript
const axios = require("axios");
const FormData = require("form-data");
const cheerio = require("cheerio");
const util = require("util");
const fetch = (...args) => import('node-fetch').then(({default: fetch}) => fetch(...args));
const google = require("google-it");
const Function = new(require("../dist/functions"));
module.exports = class Search {
tiktokSearch = async(query) => {
const form = new FormData();
form.append("keywords", query);
form.append("count", 12);
form.append("web", 1);
form.append("hd", 1);
let result = {
status: 200,
creator: "David XD",
result: []
}
await axios({
url: "https://tikwm.com/api/feed/search",
method: "POST",
headers: {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Cookie": "current_language=en; _ga=GA1.1.856946852.1718700350; _gcl_au=1.1.1374100189.1718700350; _ga_5370HT04Z3=GS1.1.1718700350.1.0.1718700355.0.0.0",
"Origin": "https://tikwm.com",
"Referer": "https://tikwm.com/",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"X-Requested-With": "XMLHttpRequest"
},
data: form
}).then((re) => {
let res = re.data.data;
res.videos.map(p => {
result.result.push({
videoInfo: {
id: p.id,
title: p.title,
region: p.region,
video_cover: "https://www.tikwm.com" + p.cover,
duration: Function.formatSecond(p.duration) ?? "Unknown",
created_at: Function.formatDate(p.create_time * 1000),
data: {
nowm: "https://www.tikwm.com" + p.play,
wm: "https://www.tikwm.com" + p.wmplay
},
count: {
play: Function.formatCount(p.play_count),
like: Function.formatCount(p.digg_count),
comment: Function.formatCount(p.comment_count),
share: Function.formatCount(p.share_count),
download: Function.formatCount(p.download_count),
}
},
author: {
id: p.author.id,
tag: "@" + p.author.unique_id,
name: p.author.nickname,
avatar: "https://www.tikwm.com" + p.author.avatar
},
musicInfo: {
id: p.music_info.id,
title: p.music_info.title,
author: p.music_info.author,
original: p.music_info.original,
duration: Function.formatSecond(p.music_info.duration),
album: p.music_info.album ?? "Unknown",
url: p.music_info.play
}
});
});
}).catch((err) => {
console.error(err);
return JSON.stringify({
status: 500,
creator: "David XD",
msg: util.format(err)
})
});
//console.log(JSON.stringify(result, null, 2))
return result;
};
GSMarenaSearch = async (query) => {
const result = {
status: 200,
creator: "David XD",
data: []
};
try {
const response = await axios({
url: `https://www.gsmarena.com/results.php3?sQuickSearch=yes&sName=${query}`,
method: "GET",
headers: {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"User-Agent": "okhttp/4.9.3"
}
});
const $ = cheerio.load(response.data);
const ditel = [];
$("#review-body > .makers > ul > li").each(function () {
const name = $(this).find("a > strong > span").html().replace(/<br>/g, ' ').trim();
const hasil = "https://www.gsmarena.com/" + $(this).find("a").attr("href");
const rilljir = axios({
url: hasil,
method: "GET",
headers: {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"User-Agent": "okhttp/4.9.3"
}
}).then((res) => {
const $ = cheerio.load(res.data);
const detail = $(".review-header > .article-info");
result.data.push({
name: name,
url: hasil,
image_url: $(this).find("a > img").attr("src"),
desc: $(this).find("a > img").attr("title"),
detail_spec: {
name: detail.find(".article-info-line.page-specs.light.border-bottom > h1").text(),
released_at: detail.find("ul > .specs-brief.pattern > .specs-brief-accent").eq(0).find("span").text().trim().split("Released ").join(""),
mass: detail.find("ul > .specs-brief.pattern > .specs-brief-accent").eq(1).find("span").text().trim(),
os: detail.find("ul > .specs-brief.pattern > .specs-brief-accent").eq(2).find("span").text().trim(),
storage: detail.find("ul > .specs-brief.pattern > .specs-brief-accent").eq(3).find("span").text().trim(),
screen_size: detail.find("ul > .help.accented.help-display > .accent > .displaysize-hl").text().trim(),
screen_resolution: detail.find("ul > .help.accented.help-display > .displayres-hl").text().trim(),
camera: {
mp: detail.find("ul > .help.accented.help-camera > .accent.accent-camera > .camerapixels-hl").text().trim() + "MP",
pixels: detail.find("ul > .help.accented.help-camera > .videopixels-hl").text().trim()
},
ram: detail.find("ul > .help.accented.help-expansion > .accent.accent-expansion > .ramsize-hl").text().trim() + "GB",
chipset: detail.find("ul > .help.accented.help-expansion > .chipset-hl").text().trim(),
battery_size: detail.find("ul > .help.accented.help-battery > .accent.accent-battery > .batsize-hl").text().trim() + "mAh",
speed_charge: detail.find("ul > .help.accented.help-battery > .battype-hl").text().trim()
}
});
}).catch((err) => {
console.error(err);
});
ditel.push(rilljir);
});
await Promise.all(ditel);
} catch (err) {
console.error({
status: 500,
creator: "David XD",
msg: util.format(err)
});
return {
status: 500,
creator: "David XD",
msg: util.format(err)
};
}
//console.log(result);
return result;
};
characterSearch = async(query, page = 1) => {
const baseURL = "https://www.anime-planet.com";
const result = {
status: 200,
creator: "David XD",
result: []
};
await axios({
url: baseURL + "/characters/all?name=" + query + "&page=" + page,
method: "GET",
headers: {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
//"Cookie": "_ga=GA1.1.300976770.1718282576; darkmode=on; cf_clearance=tzlC72vTjWsc0dgrwwsfx4.yUOacdgGFhlYGxKdWCxI-1719025323-1.0.1.1-2_q_Dz5mIIUMmNCcDYyGDiIFSGtIPcHn1r2yHUmOG.jypD9S8JRC2kmDC9U8pXI41UX.iRTn1Tq9hPSiWiy6Hw; _ga_K2VSH7P4VZ=GS1.1.1719036942.3.1.1719037045.0.0.0",
"User-Agent": "okhttp/4.9.3"
}
}).then((res) => {
if (res.status === 404) {
console.log({
status: 404,
creator: "David XD",
msg: "Query " + query + " Not Found :/"
});
return {
status: 404,
creator: "David XD",
msg: "Query " + query + " Not Found :/"
};
};
const $ = cheerio.load(res.data);
$("#siteContainer > table > tbody > tr").each(function(i, e) {
result.result.push({
name: $(e).find(".tableCharInfo > a").text(),
url: baseURL + $(e).find(".tableAvatar > a").attr("href"),
image_url: $(e).find(".tableAvatar > a > img").attr("src"),
traits: $(e).find(".tableCharInfo > .tags").eq(0).find("ul > li > a").map(function() {
return {
title: $(this).text(),
url: baseURL + $(this).attr('href')
};
}).get(),
tags: $(e).find(".tableCharInfo > .tags").eq(1).find("ul > li > a").map(function() {
return {
title: $(this).text(),
url: baseURL + $(this).attr('href')
};
}).get(),
anime: $(e).find(".tableAnime > div > ul > li > a").map(function() {
return {
title: $(this).text(),
url: baseURL + $(this).attr('href')
};
}).get()
})
});
}).catch((er) => {
console.log({
status: 500,
creator: "David XD",
msg: util.format(er)
});
return {
status: 500,
creator: "David XD",
msg: util.format(er)
};
});
//console.log(result);
return result;
};
wallpaper = async (query, page) => {
try {
const result = {
status: 200,
creator: "David XD",
data: []
};
const response = await axios({
url: "https://www.desktophut.com/search/" + query + "?page=" + page,
method: "GET",
headers: {
"User-Agent": "GoogleBot"
}
});
const $ = cheerio.load(response.data);
const items = $(".main-container > .content-wrapper > .container2.my-3 > .masonry-grid > .masonry-item");
if (items.length === 0) return {
status: 404,
creator: "David XD",
msg: "Keyword " + query + " Not Found :/"
}
for (let i = 0; i < items.length; i++) {
const item = $(items[i]);
let qua = item.find("a > .overlay > .developer").text().trim();
let res;
if (qua === "3840x2160") {
res = "4K";
} else if (qua === "2560x1440") {
res = "2K";
} else if (qua === "1920x1080") {
res = "FHD";
} else if (qua === "1280x720") {
res = "HD";
} else if (qua === "854x480") {
res = "SD";
} else if (qua === "640x360") {
res = "SD";
}
const vid = "https://www.desktophut.com/" + item.find("a").attr("href");
const respon = await axios({
url: vid,
method: "GET",
headers: {
"User-Agent": "GoogleBot"
}
});
const $$ = cheerio.load(respon.data);
const __$ = $$(".main-container > .content-wrapper.mt-1 > .container2.my-3 > .row.custapp");
const _$ = __$.find(".col-md-6 > .shadow-sm.p-2.app-details.bg-custom.rounded.mb-2");
const vid_url = __$.find("#video > source").attr("src");
const ukuran = _$.find("#embed-box > .desc > strong").eq(2).text();
const view = _$.find("#embed-box > .desc > strong").eq(3).text();
// Push result data
result.data.push({
title: item.find("a > .overlay > .title > h2").text(),
quality: qua + " (" + res + ")",
size: ukuran,
upload_at: item.find("a > .overlay > .date").text().trim(),
views: view,
url: "https://www.desktophut.com/" + item.find("a").attr("href"),
image_url: item.find("a > img").attr("src"),
video_url: vid_url
});
}
return result;
} catch (error) {
console.error(error);
return {
status: 500,
creator: "David XD",
msg: "Something Went Wrong :/"
}
}
}
pinterest = async (query) => {
const baseUrl = 'https://www.pinterest.com/resource/BaseSearchResource/get/';
const params = {
source_url: "/search/pins/?rs=ac&len=2&q=" + query + "&eq=" + query + "&etslf=3896",
data: JSON.stringify({
options: {
isPrefetch: false,
query,
scope: 'pins',
no_fetch_context_on_resource: false
},
context: {}
}),
_: new Date()
};
const target = new URL(baseUrl);
Object.entries(params).forEach(([key, value]) => target.searchParams.set(key, value));
try {
const { data } = await axios.get(target.toString());
const src = data.resource_response.data.results ? data.resource_response.data.results : [];
const ext = (url) => {
return url.substring(url.lastIndexOf('.'));
};
const result = {
status: 200,
creator: "David XD",
result: src.map(p => ({
title: p.grid_title ?? "Unknown",
url: "https://www.pinterest.com/pin/" + p.id ?? "Unknown",
created_at: Function.formatDate(new Date()) ?? "Unknown",
id: p.id ?? "Unknown",
ext: ext(p.images['orig'].url) ?? "Unknown",
image_url: p.images['orig'].url ?? "Unknown",
}))
};
//console.log(result);
return result;
} catch (e) {
console.error(e);
return {
status: 500,
creator: "David XD",
msg: e.message
};
}
};
googleSearch = async(query) => {
const result = {
status: 200,
creator: "David XD",
data: []
};
try {
let a = await google({
query: query
});
for (let b of a) {
result.data.push({
title: b.title,
desc: b.snippet,
url: b.link
});
};
} catch (e) {
console.error(e);
return {
status: 500,
creator: "David XD",
msg: e.message
};
};
return result
};
};