go-image-go
Version:
A powerful package to get images from Google.
155 lines (116 loc) • 5.55 kB
JavaScript
const Axios = require('axios');
const Utils = require('./utils');
const Parser = require('./parser');
const Cheerio = require('cheerio');
const Constants = require('./constants');
let filtered_domains = ['gstatic.com'];
async function query(search_query, options = { ris: false, page: 0, match_all_images: false, additional_params: null }) {
const page = options.page * 10;
const query = search_query.trim().split(/ +/).join('+').toLowerCase();
const url = encodeURI(options.ris ? `${Constants.URLS.W_GOOGLE}searchbyimage?image_url=${search_query}` : `${Constants.URLS.GOOGLE}search?q=${query}&aqs=chrome..69i57.1685j0j4&client=ms-android-motorola-rev2&sourceid=chrome-mobile&ie=UTF-8&aomd=1${options.safe ? '&safe=active': ''}&start=${page || 0}`);
const response = await Axios.get(url, { params: options.additional_params, headers: Utils.getHeaders(true) }).catch((error) => error);
if (response instanceof Error) throw new Error(`Could not search on Google: ${response.message}`);
const $ = Cheerio.load(Utils.formatHtml(response.data));
const parser = new Parser($, response.data);
const final_data = {
results: [],
did_you_mean: '',
knowledge_panel: {},
featured_snippet: {},
top_stories: [],
people_also_ask: [],
people_also_search_for: []
};
final_data.results = parser.getOrganicResults();
final_data.knowledge_panel = parser.getKnowledgeGraph();
final_data.featured_snippet = parser.getFeaturedSnippet();
const did_you_mean = $(Constants.SELECTORS.DID_YOU_MEAN).text();
did_you_mean && (final_data.did_you_mean = did_you_mean) || (delete final_data.did_you_mean);
const unit_converter = parser.getConverters();
unit_converter && (final_data.unit_converter = unit_converter);
const weather_forecast = parser.getWeather();
weather_forecast && (final_data.weather = weather_forecast);
const time = parser.getTime();
time && (final_data.current_time = time);
const location = parser.getLocation(time);
location && (final_data.location = location);
const dictionary = parser.getDictionary();
dictionary && (final_data.dictionary = dictionary);
const translation = parser.getTranslation();
translation && (final_data.translation = translation);
const top_stories = parser.getTopStories();
final_data.top_stories = top_stories;
const people_also_ask = parser.getPaa();
final_data.people_also_ask = people_also_ask;
const people_also_search_for = parser.getPas();
final_data.people_also_search_for = people_also_search_for;
return final_data;
}
async function search(query, options = { safe: false, exclude_domains: [], additional_params: null }) {
let search_query = query.trim().split(/ +/).join('+').toLowerCase();
let formatted_search_url = `${Constants.URLS.GIS}search?tbm=isch${options.safe ? '&safe=active': ''}&q=${search_query}`;
filtered_domains = filtered_domains.concat(options.exclude_domains);
if (options.exclude_domains) {
formatted_search_url += ' ' + filtered_domains.map((site) => '-site:' + site).join('');
}
const response = await Axios.get(encodeURI(formatted_search_url), { params: options.additional_params, headers: Utils.getHeaders(false) }).catch((error) => error);
if (response instanceof Error) throw new Error(`Could not search on Google: ${response.message}`);
const image_search_regex = Constants.REGEX.IMAGE_SEARCH;
let index = 0;
let final_data = [];
let origin = parseImageOriginData(response.data);
let parsed_data = image_search_regex.exec(response.data);
while (parsed_data != null && index != 200) {
if (!parsed_data[1].includes(filtered_domains[0])) {
final_data.push({
url: parsed_data[1],
width: parsed_data[2],
height: parsed_data[3],
origin: origin[index++],
});
}
parsed_data = image_search_regex.exec(response.data);
}
return final_data;
}
function parseImageOriginData(data) {
const image_origin_regex = Constants.REGEX.IMAGE_ORIGIN;
let parsed_data = image_origin_regex.exec(data);
let processed_data = [];
while (parsed_data != null) {
processed_data.push({
title: parsed_data[4],
website: parsed_data[3],
});
parsed_data = image_origin_regex.exec(data);
}
return processed_data;
}
async function news() {
const formatted_url = `${Constants.URLS.GOOGLE_NEWS}topstories?tab=in&hl=en-US&gl=US&ceid=US:en`;
const response = await Axios.get(formatted_url, { headers: Utils.getHeaders(true) }).catch((error) => error);
if (response instanceof Error) throw new Error(`Could not get top news: ${response.message}`);
const $ = Cheerio.load(response.data);
const final_data = { headline_stories: [] };
const headline_stories_publishers = $(Constants.SELECTORS.PUBLISHER).map((i, el) => $(el).text()).get();
const headline_stories_imgs = $(Constants.SELECTORS.STORY_IMG).map((i, el) => $(el).attr('src')).get();
const headline_stories_time = $(Constants.SELECTORS.STORY_TIME).map((i, el) => $(el).text()).get();
$('a[class="DY5T1d RZIKme"]').each((i, el) => {
const headline_stories_title = $(el).text();
const headline_stories_url = $(el).attr('href');
final_data.headline_stories.push({
title: headline_stories_title,
url: `${Constants.URLS.GOOGLE_NEWS}${headline_stories_url.slice(2)}`,
image: headline_stories_imgs[i],
published: headline_stories_time[i],
by: headline_stories_publishers[i]
});
});
return final_data;
}
module.exports = {
news,
search,
query
};
;