@davidpunya/web-scraper
Version:
The library web scraper for Restfull API's
110 lines (106 loc) • 6.46 kB
JavaScript
const axios = require("axios");
const FormData = require("form-data");
const cheerio = require("cheerio");
const fs = require("fs");
const util = require("util");
const fetch = (...args) => import('node-fetch').then(({default: fetch}) => fetch(...args));
module.exports = class Information {
nasaGetNews = async () => {
let result = {
status: 200,
creator: "David XD",
result: []
};
await axios("https://www.nasa.gov/news/all-news", {
method: "GET",
headers: {
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Cookie": "_gid=GA1.2.1041500301.1718689119; _ga_3MLXXCVWWY=GS1.1.1718689318.1.0.1718689321.0.0.0; _parsely_session={%22sid%22:4%2C%22surl%22:%22https://www.nasa.gov/news/all-news/%22%2C%22sref%22:%22%22%2C%22sts%22:1718774199682%2C%22slts%22:1718694988570}; _parsely_visitor={%22id%22:%22pid=796aa82b-30b7-4311-87fa-d9e3a997f3dc%22%2C%22session_count%22:4%2C%22last_session_ts%22:1718774199682}; _ga_CSLL4ZEK4L=GS1.1.1718774198.6.1.1718774208.0.0.0; _ga=GA1.1.1898039358.1718543656",
"Referer": "https://www.nasa.gov/",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
}).then((res) => {
const $ = cheerio.load(res.data);
$("#primary > article > .entry-content > .hds-content-lists-inner > .hds-content-lists > .grid-container > .hds-content-items > .hds-content-item").each(function (a, b) {
result.result.push({
title: $(b).find(".hds-content-item-inner > a > div").text(),
reading: $(b).find(".hds-content-item-inner > .hds-content-item-readtime").text().trim(),
desc: $(b).find(".hds-content-item-inner > .margin-top-0").text(),
type: $(b).find(".hds-content-item-inner > .display-flex > span").text(),
url: $(b).find("a").attr("href"),
image_url: $(b).find("a > figure > img").attr("src")
});
});
}).catch((err) => {
console.error(err);
return JSON.stringify({
status: 500,
creator: "David XD",
msg: util.format(err)
});
});
//console.log(result);
return result;
};
GSMarenaNews = async () => {
const result = {
status: 200,
creator: "David XD",
data: []
};
const url = "https://www.gsmarena.com/";
try {
const res = await axios.get(url, {
headers: {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Cookie": `_lr_env_src_ats=false; panoramaId_expiry=1719571932058; _cc_id=255febc7bca745f60d1ae6458c48a78c; panoramaId=3b5b25076b43d47b1473ffe2765116d53938e564499236d382883d568ea26e96; _au_1d=AU1D-0100-001718967090-KH6HDTK6-VVH9; _gid=GA1.2.2053274941.1718967091; connectId={"ttl":86400000,"lastUsed":1718967090698,"lastSynced":1718967090698}; _lr_geo_location=ID; lpe=648; keyw=Samsung; _ga_FVWZ0RM4DH=GS1.1.1718980996.2.1.1718983943.60.0.0; _lr_retry_request=true; __gads=ID=1de07897a7652ab6:T=1718967134:RT=1719017685:S=ALNI_Mbqb0-l46vQylZiqXii3u30U2xrcw; __gpi=UID=00000e59cf4d8fe4:T=1718967134:RT=1719017685:S=ALNI_MZScg6DsCeXULyVh1CjRDMKqsXhvA; __eoi=ID=00138c4b50706d9d:T=1718967134:RT=1719017685:S=AA-Afja6mH36gkgySJHKqk33O2fO; _ga_WECNNBCHQE=GS1.1.1719017638.4.1.1719017725.0.0.0; cto_bundle=gqJdlF8xeDMlMkJMOWk4azVPcFptQ2I3dThKS2hyRWpVNFdCTXh4T2RINGdNVVoybDc4NmNnN0oxaWtvcWdReWFEajBRWXZNOUVGSEN4amtBeWlpSnk1NGpEYmsxMzJlSW1PRUhXSE15akNNa1pxbjhjVVhzZmh2M2k0MkVvS1h3WGhuJTJCJTJCdTVIR2pHb3Bna2lCQmZnUXV0QUttZGclM0QlM0Q; cto_bidid=-nG6IF9vTmlyWVc3TmdDUEdRJTJCUTBnWEJHQyUyRjBidm9TTjN2Ymw1UVNtbkxNbkhCaXNURm5PdndQVzN2JTJCODJ1OGFzVmxLQXVXdHhlNFlNOW1PVFV4ZktBbVpuYmhjSVFIa1ZVY1A0YnJ3Y1hnaXloMCUzRA; __qca=I0-1578833655-1719017729874; _ga=GA1.2.406614757.1718967085`,
"Host": "www.gsmarena.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
}
});
const $ = cheerio.load(res.data);
$(".news-item").each((i, e) => {
const title = $(e).find("a > h3").text().trim();
const link = url + $(e).find("a").attr("href");
const imageSrc = $(e).find("a > .news-item-media-wrap.left > img").attr("src");
const uploadAt = $(e).find(".meta-line > .meta-item-time").text().trim();
const commentCount = $(e).find(".meta-line > .meta-item-comments").text().trim();
const description = $(e).find("a > p").text().trim();
result.data.push({
title,
url: link,
image_url: imageSrc,
upload_at: uploadAt,
comment: commentCount,
desc: description
});
});
} catch (err) {
console.log({
status: 500,
creator: "David XD",
msg: err.message
});
return {
status: 500,
creator: "David XD",
msg: err.message
};
};
//console.log(result);
return result;
};
jarak = async(dari, ke) => {
let html = await (await axios(`https://www.google.com/search?q=${encodeURIComponent('jarak ' + dari + ' ke ' + ke)}&hl=id`)).data
let $ = cheerio.load(html);
let res = {
status: 200,
creator: "David XD",
result: {}
};
let img = html.split("var s=\'")?.[1]?.split("\'")?.[0]
res.result.image_url = /^data:.*?\/.*?;base64,/i.test(img) ? Buffer.from(img.split`,` [1], 'base64') : ''
res.result.desc = $('div.BNeawe.deIvCb.AP7Wnd').text()?.trim()
return res
};
}