UNPKG

@davidpunya/web-scraper

Version:

The library web scraper for Restfull API's

110 lines (106 loc) 6.46 kB
const axios = require("axios"); const FormData = require("form-data"); const cheerio = require("cheerio"); const fs = require("fs"); const util = require("util"); const fetch = (...args) => import('node-fetch').then(({default: fetch}) => fetch(...args)); module.exports = class Information { nasaGetNews = async () => { let result = { status: 200, creator: "David XD", result: [] }; await axios("https://www.nasa.gov/news/all-news", { method: "GET", headers: { Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Cookie": "_gid=GA1.2.1041500301.1718689119; _ga_3MLXXCVWWY=GS1.1.1718689318.1.0.1718689321.0.0.0; _parsely_session={%22sid%22:4%2C%22surl%22:%22https://www.nasa.gov/news/all-news/%22%2C%22sref%22:%22%22%2C%22sts%22:1718774199682%2C%22slts%22:1718694988570}; _parsely_visitor={%22id%22:%22pid=796aa82b-30b7-4311-87fa-d9e3a997f3dc%22%2C%22session_count%22:4%2C%22last_session_ts%22:1718774199682}; _ga_CSLL4ZEK4L=GS1.1.1718774198.6.1.1718774208.0.0.0; _ga=GA1.1.1898039358.1718543656", "Referer": "https://www.nasa.gov/", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" } }).then((res) => { const $ = cheerio.load(res.data); $("#primary > article > .entry-content > .hds-content-lists-inner > .hds-content-lists > .grid-container > .hds-content-items > .hds-content-item").each(function (a, b) { result.result.push({ title: $(b).find(".hds-content-item-inner > a > div").text(), reading: $(b).find(".hds-content-item-inner > .hds-content-item-readtime").text().trim(), desc: $(b).find(".hds-content-item-inner > .margin-top-0").text(), type: $(b).find(".hds-content-item-inner > .display-flex > span").text(), url: $(b).find("a").attr("href"), image_url: $(b).find("a > figure > img").attr("src") }); }); }).catch((err) => { console.error(err); return JSON.stringify({ status: 500, creator: "David XD", msg: util.format(err) }); }); //console.log(result); return result; }; GSMarenaNews = async () => { const result = { status: 200, creator: "David XD", data: [] }; const url = "https://www.gsmarena.com/"; try { const res = await axios.get(url, { headers: { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Cookie": `_lr_env_src_ats=false; panoramaId_expiry=1719571932058; _cc_id=255febc7bca745f60d1ae6458c48a78c; panoramaId=3b5b25076b43d47b1473ffe2765116d53938e564499236d382883d568ea26e96; _au_1d=AU1D-0100-001718967090-KH6HDTK6-VVH9; _gid=GA1.2.2053274941.1718967091; connectId={"ttl":86400000,"lastUsed":1718967090698,"lastSynced":1718967090698}; _lr_geo_location=ID; lpe=648; keyw=Samsung; _ga_FVWZ0RM4DH=GS1.1.1718980996.2.1.1718983943.60.0.0; _lr_retry_request=true; __gads=ID=1de07897a7652ab6:T=1718967134:RT=1719017685:S=ALNI_Mbqb0-l46vQylZiqXii3u30U2xrcw; __gpi=UID=00000e59cf4d8fe4:T=1718967134:RT=1719017685:S=ALNI_MZScg6DsCeXULyVh1CjRDMKqsXhvA; __eoi=ID=00138c4b50706d9d:T=1718967134:RT=1719017685:S=AA-Afja6mH36gkgySJHKqk33O2fO; _ga_WECNNBCHQE=GS1.1.1719017638.4.1.1719017725.0.0.0; cto_bundle=gqJdlF8xeDMlMkJMOWk4azVPcFptQ2I3dThKS2hyRWpVNFdCTXh4T2RINGdNVVoybDc4NmNnN0oxaWtvcWdReWFEajBRWXZNOUVGSEN4amtBeWlpSnk1NGpEYmsxMzJlSW1PRUhXSE15akNNa1pxbjhjVVhzZmh2M2k0MkVvS1h3WGhuJTJCJTJCdTVIR2pHb3Bna2lCQmZnUXV0QUttZGclM0QlM0Q; cto_bidid=-nG6IF9vTmlyWVc3TmdDUEdRJTJCUTBnWEJHQyUyRjBidm9TTjN2Ymw1UVNtbkxNbkhCaXNURm5PdndQVzN2JTJCODJ1OGFzVmxLQXVXdHhlNFlNOW1PVFV4ZktBbVpuYmhjSVFIa1ZVY1A0YnJ3Y1hnaXloMCUzRA; __qca=I0-1578833655-1719017729874; _ga=GA1.2.406614757.1718967085`, "Host": "www.gsmarena.com", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" } }); const $ = cheerio.load(res.data); $(".news-item").each((i, e) => { const title = $(e).find("a > h3").text().trim(); const link = url + $(e).find("a").attr("href"); const imageSrc = $(e).find("a > .news-item-media-wrap.left > img").attr("src"); const uploadAt = $(e).find(".meta-line > .meta-item-time").text().trim(); const commentCount = $(e).find(".meta-line > .meta-item-comments").text().trim(); const description = $(e).find("a > p").text().trim(); result.data.push({ title, url: link, image_url: imageSrc, upload_at: uploadAt, comment: commentCount, desc: description }); }); } catch (err) { console.log({ status: 500, creator: "David XD", msg: err.message }); return { status: 500, creator: "David XD", msg: err.message }; }; //console.log(result); return result; }; jarak = async(dari, ke) => { let html = await (await axios(`https://www.google.com/search?q=${encodeURIComponent('jarak ' + dari + ' ke ' + ke)}&hl=id`)).data let $ = cheerio.load(html); let res = { status: 200, creator: "David XD", result: {} }; let img = html.split("var s=\'")?.[1]?.split("\'")?.[0] res.result.image_url = /^data:.*?\/.*?;base64,/i.test(img) ? Buffer.from(img.split`,` [1], 'base64') : '' res.result.desc = $('div.BNeawe.deIvCb.AP7Wnd').text()?.trim() return res }; }