indo-news-scraper
Version:
A news scraper for javascript that help to scrap news from Indonesian news portal.
73 lines (61 loc) • 2.69 kB
JavaScript
'use_strict';
const cheerio = require('cheerio');
const puppeteer = require('puppeteer');
const moment = require('moment');
const Antara = function(){};
Antara.prototype.source = 'Antara';
Antara.prototype.baseUrl = 'https://www.antaranews.com/search';
Antara.prototype.headless = true;
Antara.prototype.scrap = (query = null) => {
let url = Antara.prototype.baseUrl;
if(!query){
throw new Error('Please provide a keyword!');
}
url+=`?q=${query}`;
return puppeteer
.launch({headless: Antara.prototype.headless})
.then(browser => browser.newPage())
.then(page => {
return page.goto(url).then(() => {
return page.content();
});
})
.then(html => {
const $ = cheerio.load(html);
const newsData = [];
$('.simple-big').each((e, el) => {
$(el).find('header').find('.simple-share').children().next().children().remove('i');
newsData.push({
title: $(el).find('header').children().children().html(),
url: $(el).find('header').find('h3').children().attr('href'),
img: $(el).find('.simple-thumb').find('picture').find('img').data('src'),
date: Antara.prototype.convertDate(($(el).find('header').find('.simple-share').children().next().html()).substring(1))
});
});
return newsData;
})
.catch(err => new Error(err));
}
Antara.prototype.convertDate = (dateString) => {
if(dateString.toLowerCase().includes('menit lalu')){
return moment().add(dateString.replace('menit lalu', ''), 'minutes').toISOString();
}else if(dateString.toLowerCase().includes('detik lalu')){
return moment().add(dateString.replace('detik lalu', ''), 'seconds').toISOString();
}else{
dateString = dateString.replace(/Januari/g, 'January');
dateString = dateString.replace(/Februari/g, 'February');
dateString = dateString.replace(/Maret/g, 'March');
dateString = dateString.replace(/April/g, 'April');
dateString = dateString.replace(/Mei/g, 'May');
dateString = dateString.replace(/Juni/g, 'June');
dateString = dateString.replace(/Juli/g, 'July');
dateString = dateString.replace(/Agustus/g, 'August');
dateString = dateString.replace(/September/g, 'September');
dateString = dateString.replace(/Oktober/g, 'October');
dateString = dateString.replace(/November/g, 'November');
dateString = dateString.replace(/Desember/g, 'December');
}
let d = moment(dateString, 'DD MMMM YYYY HH:mm');
return d.toISOString();
}
module.exports = new Antara();