UNPKG

@davidpunya/web-scraper

Version:

The library web scraper for Restfull API's

381 lines (373 loc) 16.8 kB
const axios = require("axios"); const FormData = require("form-data"); const cheerio = require("cheerio"); const util = require("util"); const fetch = (...args) => import('node-fetch').then(({default: fetch}) => fetch(...args)); const google = require("google-it"); const Function = new(require("../dist/functions")); module.exports = class Search { tiktokSearch = async(query) => { const form = new FormData(); form.append("keywords", query); form.append("count", 12); form.append("web", 1); form.append("hd", 1); let result = { status: 200, creator: "David XD", result: [] } await axios({ url: "https://tikwm.com/api/feed/search", method: "POST", headers: { "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "Cookie": "current_language=en; _ga=GA1.1.856946852.1718700350; _gcl_au=1.1.1374100189.1718700350; _ga_5370HT04Z3=GS1.1.1718700350.1.0.1718700355.0.0.0", "Origin": "https://tikwm.com", "Referer": "https://tikwm.com/", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", "X-Requested-With": "XMLHttpRequest" }, data: form }).then((re) => { let res = re.data.data; res.videos.map(p => { result.result.push({ videoInfo: { id: p.id, title: p.title, region: p.region, video_cover: "https://www.tikwm.com" + p.cover, duration: Function.formatSecond(p.duration) ?? "Unknown", created_at: Function.formatDate(p.create_time * 1000), data: { nowm: "https://www.tikwm.com" + p.play, wm: "https://www.tikwm.com" + p.wmplay }, count: { play: Function.formatCount(p.play_count), like: Function.formatCount(p.digg_count), comment: Function.formatCount(p.comment_count), share: Function.formatCount(p.share_count), download: Function.formatCount(p.download_count), } }, author: { id: p.author.id, tag: "@" + p.author.unique_id, name: p.author.nickname, avatar: "https://www.tikwm.com" + p.author.avatar }, musicInfo: { id: p.music_info.id, title: p.music_info.title, author: p.music_info.author, original: p.music_info.original, duration: Function.formatSecond(p.music_info.duration), album: p.music_info.album ?? "Unknown", url: p.music_info.play } }); }); }).catch((err) => { console.error(err); return JSON.stringify({ status: 500, creator: "David XD", msg: util.format(err) }) }); //console.log(JSON.stringify(result, null, 2)) return result; }; GSMarenaSearch = async (query) => { const result = { status: 200, creator: "David XD", data: [] }; try { const response = await axios({ url: `https://www.gsmarena.com/results.php3?sQuickSearch=yes&sName=${query}`, method: "GET", headers: { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "User-Agent": "okhttp/4.9.3" } }); const $ = cheerio.load(response.data); const ditel = []; $("#review-body > .makers > ul > li").each(function () { const name = $(this).find("a > strong > span").html().replace(/<br>/g, ' ').trim(); const hasil = "https://www.gsmarena.com/" + $(this).find("a").attr("href"); const rilljir = axios({ url: hasil, method: "GET", headers: { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "User-Agent": "okhttp/4.9.3" } }).then((res) => { const $ = cheerio.load(res.data); const detail = $(".review-header > .article-info"); result.data.push({ name: name, url: hasil, image_url: $(this).find("a > img").attr("src"), desc: $(this).find("a > img").attr("title"), detail_spec: { name: detail.find(".article-info-line.page-specs.light.border-bottom > h1").text(), released_at: detail.find("ul > .specs-brief.pattern > .specs-brief-accent").eq(0).find("span").text().trim().split("Released ").join(""), mass: detail.find("ul > .specs-brief.pattern > .specs-brief-accent").eq(1).find("span").text().trim(), os: detail.find("ul > .specs-brief.pattern > .specs-brief-accent").eq(2).find("span").text().trim(), storage: detail.find("ul > .specs-brief.pattern > .specs-brief-accent").eq(3).find("span").text().trim(), screen_size: detail.find("ul > .help.accented.help-display > .accent > .displaysize-hl").text().trim(), screen_resolution: detail.find("ul > .help.accented.help-display > .displayres-hl").text().trim(), camera: { mp: detail.find("ul > .help.accented.help-camera > .accent.accent-camera > .camerapixels-hl").text().trim() + "MP", pixels: detail.find("ul > .help.accented.help-camera > .videopixels-hl").text().trim() }, ram: detail.find("ul > .help.accented.help-expansion > .accent.accent-expansion > .ramsize-hl").text().trim() + "GB", chipset: detail.find("ul > .help.accented.help-expansion > .chipset-hl").text().trim(), battery_size: detail.find("ul > .help.accented.help-battery > .accent.accent-battery > .batsize-hl").text().trim() + "mAh", speed_charge: detail.find("ul > .help.accented.help-battery > .battype-hl").text().trim() } }); }).catch((err) => { console.error(err); }); ditel.push(rilljir); }); await Promise.all(ditel); } catch (err) { console.error({ status: 500, creator: "David XD", msg: util.format(err) }); return { status: 500, creator: "David XD", msg: util.format(err) }; } //console.log(result); return result; }; characterSearch = async(query, page = 1) => { const baseURL = "https://www.anime-planet.com"; const result = { status: 200, creator: "David XD", result: [] }; await axios({ url: baseURL + "/characters/all?name=" + query + "&page=" + page, method: "GET", headers: { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", //"Cookie": "_ga=GA1.1.300976770.1718282576; darkmode=on; cf_clearance=tzlC72vTjWsc0dgrwwsfx4.yUOacdgGFhlYGxKdWCxI-1719025323-1.0.1.1-2_q_Dz5mIIUMmNCcDYyGDiIFSGtIPcHn1r2yHUmOG.jypD9S8JRC2kmDC9U8pXI41UX.iRTn1Tq9hPSiWiy6Hw; _ga_K2VSH7P4VZ=GS1.1.1719036942.3.1.1719037045.0.0.0", "User-Agent": "okhttp/4.9.3" } }).then((res) => { if (res.status === 404) { console.log({ status: 404, creator: "David XD", msg: "Query " + query + " Not Found :/" }); return { status: 404, creator: "David XD", msg: "Query " + query + " Not Found :/" }; }; const $ = cheerio.load(res.data); $("#siteContainer > table > tbody > tr").each(function(i, e) { result.result.push({ name: $(e).find(".tableCharInfo > a").text(), url: baseURL + $(e).find(".tableAvatar > a").attr("href"), image_url: $(e).find(".tableAvatar > a > img").attr("src"), traits: $(e).find(".tableCharInfo > .tags").eq(0).find("ul > li > a").map(function() { return { title: $(this).text(), url: baseURL + $(this).attr('href') }; }).get(), tags: $(e).find(".tableCharInfo > .tags").eq(1).find("ul > li > a").map(function() { return { title: $(this).text(), url: baseURL + $(this).attr('href') }; }).get(), anime: $(e).find(".tableAnime > div > ul > li > a").map(function() { return { title: $(this).text(), url: baseURL + $(this).attr('href') }; }).get() }) }); }).catch((er) => { console.log({ status: 500, creator: "David XD", msg: util.format(er) }); return { status: 500, creator: "David XD", msg: util.format(er) }; }); //console.log(result); return result; }; wallpaper = async (query, page) => { try { const result = { status: 200, creator: "David XD", data: [] }; const response = await axios({ url: "https://www.desktophut.com/search/" + query + "?page=" + page, method: "GET", headers: { "User-Agent": "GoogleBot" } }); const $ = cheerio.load(response.data); const items = $(".main-container > .content-wrapper > .container2.my-3 > .masonry-grid > .masonry-item"); if (items.length === 0) return { status: 404, creator: "David XD", msg: "Keyword " + query + " Not Found :/" } for (let i = 0; i < items.length; i++) { const item = $(items[i]); let qua = item.find("a > .overlay > .developer").text().trim(); let res; if (qua === "3840x2160") { res = "4K"; } else if (qua === "2560x1440") { res = "2K"; } else if (qua === "1920x1080") { res = "FHD"; } else if (qua === "1280x720") { res = "HD"; } else if (qua === "854x480") { res = "SD"; } else if (qua === "640x360") { res = "SD"; } const vid = "https://www.desktophut.com/" + item.find("a").attr("href"); const respon = await axios({ url: vid, method: "GET", headers: { "User-Agent": "GoogleBot" } }); const $$ = cheerio.load(respon.data); const __$ = $$(".main-container > .content-wrapper.mt-1 > .container2.my-3 > .row.custapp"); const _$ = __$.find(".col-md-6 > .shadow-sm.p-2.app-details.bg-custom.rounded.mb-2"); const vid_url = __$.find("#video > source").attr("src"); const ukuran = _$.find("#embed-box > .desc > strong").eq(2).text(); const view = _$.find("#embed-box > .desc > strong").eq(3).text(); // Push result data result.data.push({ title: item.find("a > .overlay > .title > h2").text(), quality: qua + " (" + res + ")", size: ukuran, upload_at: item.find("a > .overlay > .date").text().trim(), views: view, url: "https://www.desktophut.com/" + item.find("a").attr("href"), image_url: item.find("a > img").attr("src"), video_url: vid_url }); } return result; } catch (error) { console.error(error); return { status: 500, creator: "David XD", msg: "Something Went Wrong :/" } } } pinterest = async (query) => { const baseUrl = 'https://www.pinterest.com/resource/BaseSearchResource/get/'; const params = { source_url: "/search/pins/?rs=ac&len=2&q=" + query + "&eq=" + query + "&etslf=3896", data: JSON.stringify({ options: { isPrefetch: false, query, scope: 'pins', no_fetch_context_on_resource: false }, context: {} }), _: new Date() }; const target = new URL(baseUrl); Object.entries(params).forEach(([key, value]) => target.searchParams.set(key, value)); try { const { data } = await axios.get(target.toString()); const src = data.resource_response.data.results ? data.resource_response.data.results : []; const ext = (url) => { return url.substring(url.lastIndexOf('.')); }; const result = { status: 200, creator: "David XD", result: src.map(p => ({ title: p.grid_title ?? "Unknown", url: "https://www.pinterest.com/pin/" + p.id ?? "Unknown", created_at: Function.formatDate(new Date()) ?? "Unknown", id: p.id ?? "Unknown", ext: ext(p.images['orig'].url) ?? "Unknown", image_url: p.images['orig'].url ?? "Unknown", })) }; //console.log(result); return result; } catch (e) { console.error(e); return { status: 500, creator: "David XD", msg: e.message }; } }; googleSearch = async(query) => { const result = { status: 200, creator: "David XD", data: [] }; try { let a = await google({ query: query }); for (let b of a) { result.data.push({ title: b.title, desc: b.snippet, url: b.link }); }; } catch (e) { console.error(e); return { status: 500, creator: "David XD", msg: e.message }; }; return result }; };