UNPKG

go-image-go

Version:

A powerful package to get images from Google.

300 lines (256 loc) 12 kB
'use strict'; const Utils = require('./utils'); const Constants = require('./constants'); const NormalizeText = require('replace-special-characters'); class Parser { constructor($, data) { this.$ = $; this.data = data; } getOrganicResults() { const titles = this.$(Constants.SELECTORS.TITLE).map((i, el) => { if (el.parent.attribs.style != '-webkit-line-clamp:2') // ignores ad titles return this.$(el.children).text().trim(); }).get(); const descriptions = this.$(Constants.SELECTORS.DESCRIPTION).map((i, el) => { if (el.parent.attribs.class != 'w1C3Le') // ignores ad descriptions return this.$(el).text().trim(); }).get(); const urls = this.$(Constants.SELECTORS.URL).map((i, el) => this.$(el).attr('href')).get(); this.correctFuzzyData(titles, descriptions, urls); return titles.map((title, index) => { return { title: title || 'N/A', description: descriptions[index] || 'N/A', url: urls[index] || 'N/A', favicons: { high_res: `https://api.faviconkit.com/${new URL(urls[index] || 'https://google.com').hostname}/192`, low_res: `https://www.google.com/s2/favicons?sz=64&domain_url=${new URL(urls[index] || 'https://google.com').hostname}` } }; }); } getKnowledgeGraph() { const knowledge_panel = {}; knowledge_panel.title = this.$(Constants.SELECTORS.KNO_PANEL_TITLE[0]).first().text() || this.$(Constants.SELECTORS.KNO_PANEL_TITLE[1]).text() || 'N/A'; knowledge_panel.description = this.$(Constants.SELECTORS.KNO_PANEL_DESCRIPTION).first().text() || 'N/A'; knowledge_panel.url = this.$(Constants.SELECTORS.KNO_PANEL_URL).attr('href') || 'N/A'; // Extracts metadata from the knowledge graph this.$(Constants.SELECTORS.KNO_PANEL_METADATA).each((i, el) => { const key = this.$(el).first().text().trim().slice(0, -1); const value = this.$(el).next().text().trim(); value.length && (knowledge_panel[NormalizeText(key.toLowerCase().replace(/ /g, '_'))] = value.trim()); }); const knowledge_panel_type = this.$(Constants.SELECTORS.KNO_PANEL_TYPE).last().text(); if (knowledge_panel_type && knowledge_panel_type !== knowledge_panel.title) { knowledge_panel.type = knowledge_panel_type; } knowledge_panel.books = this.$(Constants.SELECTORS.KNO_PANEL_BOOKS).map((i, el) => { if (this.$(el).next().text().trim() == '') return; return { title: this.$(el).first().text().trim(), year: this.$(el).next().text().trim() }; }).get(); knowledge_panel.books.length == 0 && delete knowledge_panel.books; knowledge_panel.tv_shows_and_movies = this.$(Constants.SELECTORS.KNO_PANEL_TV_SHOWS_AND_MOVIES).map((i, el) => { if (this.$(el).next().text().trim() == '') return; return { title: this.$(el).first().text().trim(), year: this.$(el).next().text().trim() }; }).get(); knowledge_panel.tv_shows_and_movies.length == 0 && delete knowledge_panel.tv_shows_and_movies; const song_lyrics = this.$(Constants.SELECTORS.KNO_PANEL_SONG_LYRICS).map((i, el) => this.$(this.$(el).html().replace(/<\/span><\/div><div jsname="u8s5sf" class="ujudub"><span jsname="ys01ge">/g, '\n\n').replace(/<br>/g, '\n')).text()).get(); song_lyrics.length > 0 && (knowledge_panel.lyrics = song_lyrics.join('\n\n')); const google_users_rating = this.$(Constants.SELECTORS.KNO_PANEL_FILM_GOOGLEUSERS_RATING)[0]; if (google_users_rating) { const rating = this.$(google_users_rating.children[0].children[0]).text() || 'N/A'; knowledge_panel.ratings = []; knowledge_panel.ratings.push({ name: 'Google Users', rating: rating }); } this.$(Constants.SELECTORS.KNO_PANEL_FILM_RATINGS[0]).each((i, el) => { knowledge_panel.ratings = knowledge_panel.ratings || []; const name = this.$(this.$(Constants.SELECTORS.KNO_PANEL_FILM_RATINGS[1])[i]).attr('title'); const rating = this.$(el).text(); knowledge_panel.ratings.push({ name: name, rating: rating }); }); knowledge_panel.available_on = this.$(Constants.SELECTORS.KNO_PANEL_AVAILABLE_ON).map((i, el) => this.$(el).text()).get(); knowledge_panel.available_on.length == 0 && delete knowledge_panel.available_on; knowledge_panel.images = this.$(Constants.SELECTORS.KNO_PANEL_IMAGES).map((i, elem) => { return { url: this.$(elem).attr('data-src'), source: this.$(elem).parent().parent().parent().attr('data-lpage'), }; }).get().filter((img) => img.url !== undefined); knowledge_panel.images.length == 0 && delete knowledge_panel.images; const animal_preview = Utils.getStringBetweenStrings(this.data, 'source src\\x3d\\x22', '.mp4'); animal_preview && (knowledge_panel.demonstration = animal_preview + '.mp4'); return knowledge_panel; } getFeaturedSnippet() { const featured_snippet_title = this.$(Constants.SELECTORS.FEATURED_SNIPPET_TITLE[0]).text() || this.$(Constants.SELECTORS.FEATURED_SNIPPET_TITLE[1]).text() || this.$(Constants.SELECTORS.FEATURED_SNIPPET_TITLE[2]).text() || undefined; const featured_snippet_url = this.$(Constants.SELECTORS.FEATURED_SNIPPET_URL).map((i, el) => this.$(el).attr('href')).get()[0]; const featured_snippet = Constants.SELECTORS.FEATURED_SNIPPET_DESC.map((selector) => { if (this.$(selector)[0] && selector != Constants.SELECTORS.FEATURED_SNIPPET_DESC[2]) { const text = this.$(selector).html().replace(/<\/li>|<\/b>|<b>/g, '').replace(/&amp;/g, '&').split('<li class="TrT0Xe">').join('\n').trim(); return text; } else if (selector == Constants.SELECTORS.FEATURED_SNIPPET_DESC[2]) { const text = this.$(selector).text(); return text; } else { return undefined; } }).filter(text => text != undefined && text.length != 0)[0]; return { title: featured_snippet_title || 'N/A', description: featured_snippet || 'N/A', url: featured_snippet_url || 'N/A' }; } getTopStories() { // Removes unnecessary text from the short description this.$(`${Constants.SELECTORS.TOP_STORIES_DESCRIPTION[0]} > div.CEMjEf`).each((i, el) => this.$(el).remove()); this.$(`${Constants.SELECTORS.TOP_STORIES_DESCRIPTION[0]} > div > p`).each((i, el) => this.$(el).remove()); const top_stories_descriptions = Constants.SELECTORS.TOP_STORIES_DESCRIPTION.map((selector) => this.$(selector).map((i, el) => this.$(el).text().slice(1)).get()).filter((descs) => descs.length > 0)[0]; const top_stories_urls = this.$(Constants.SELECTORS.TOP_STORIES_URL).map((i, el) => this.$(el).attr('href')).get(); return top_stories_urls.map((item, i) => { if (!top_stories_descriptions) return; return { description: top_stories_descriptions[i], url: item, }; }).filter((story) => story); } getPaa() { let people_also_ask = []; Constants.SELECTORS.PAA.forEach((item) => this.$(item).each((i, el) => people_also_ask.push(this.$(el).text()))); people_also_ask.shift(); return people_also_ask; } getPas() { return this.$(Constants.SELECTORS.PASF).map((i, el) => { if (!this.$(el).attr('data-src')) return; return { title: this.$(el).attr('alt'), thumbnail: `https:${this.$(el).attr('data-src')}` }; }).get(); } getTime() { const hours = this.$(Constants.SELECTORS.CURRENT_TIME_HOUR).text(); const date = this.$(Constants.SELECTORS.CURRENT_TIME_DATE).map((i, el) => this.$(el).text()).get()[1]; if (date) { return { hours: hours.trim(), date: date.trim() }; } } getWeather() { const weather_location = this.$(Constants.SELECTORS.WEATHER_LOCATION).text(); const weather_forecast = this.$(Constants.SELECTORS.WEATHER_FORECAST).text(); const precipitation = this.$(Constants.SELECTORS.PRECIPITATION).text(); const air_humidity = this.$(Constants.SELECTORS.AIR_HUMIDITY).text(); const temperature = this.$(Constants.SELECTORS.TEMPERATURE).text(); const wind_speed = this.$(Constants.SELECTORS.WIND_SPEED).text(); if (weather_location && weather_forecast) { return { location: weather_location, forecast: weather_forecast, precipitation, humidity: air_humidity, temperature: temperature, wind: wind_speed }; } } getLocation(date) { const location_title = this.$(Constants.SELECTORS.LOCATION_TITLE).text(); const location_distance = this.$(Constants.SELECTORS.LOCATION_DISTANCE).text(); const location_image = this.$(Constants.SELECTORS.LOCATION_IMAGE).attr('src'); if (location_title && location_distance && !date) { return { title: location_title, distance: location_distance, map: 'https://google.com' + location_image }; } } getTranslation() { const source_language = this.$(Constants.SELECTORS.TR_SOURCE_LANGUAGE).text(); const target_language = this.$(Constants.SELECTORS.TR_TARGET_LANGUAGE).text(); const source_text = this.$(Constants.SELECTORS.TR_SOURCE_TEXT).text(); const target_text = this.$(Constants.SELECTORS.TR_TARGET_TEXT).text(); if (source_text.length > 0) { return { source_language, target_language, source_text, target_text }; } } getDictionary() { const word = this.$(Constants.SELECTORS.GD_WORD).text(); const phonetic = this.$(Constants.SELECTORS.GD_PHONETIC).text(); const audio = this.$(Constants.SELECTORS.GD_AUDIO).attr('src'); if (word) { return { word: word || 'N/A', phonetic: phonetic || 'N/A', audio: audio ? `https:${audio}` : 'N/A', definitions: this.$(Constants.SELECTORS.GD_DEFINITIONS).map((i, el) => this.$(el).text()).get(), examples: this.$(Constants.SELECTORS.GD_EXAMPLES).map((i, el) => this.$(el).text()).get() }; } } getConverters() { const unit_converter_input = this.$(Constants.SELECTORS.UNIT_CONVERTER_INPUT).attr('value'); const unit_converter_output = this.$(Constants.SELECTORS.UNIT_CONVERTER_OUTPUT).attr('value'); const unit_converter_formula = this.$(Constants.SELECTORS.UNIT_CONVERTER_FORMULA).text(); const input_currency_name = this.$(Constants.SELECTORS.INPUT_CURRENCY_NAME).attr('data-name'); const output_currency_name = this.$(Constants.SELECTORS.OUTPUT_CURRENCY_NAME).attr('data-name'); const currency_converter_input = this.$(Constants.SELECTORS.CURRENCY_CONVERTER_INPUT).text(); const currency_converter_output = this.$(Constants.SELECTORS.CURRENCY_CONVERTER_OUTPUT).text(); if (unit_converter_input && unit_converter_output) { return { input: unit_converter_input, output: unit_converter_output, formula: unit_converter_formula }; } else if (currency_converter_input && currency_converter_output) { return { input: { name: input_currency_name, value: currency_converter_input }, output: { name: output_currency_name, value: currency_converter_output } }; } } correctFuzzyData(titles, descriptions, urls) { // Corrects wrongly parsed data, this doesn't often happen though. titles.length < urls.length && titles.length < descriptions.length && urls.shift(); urls.length > titles.length && urls.shift(); const innacurate_data = descriptions.length > urls.slice(1).length ? false : true; urls.forEach((item, index) => { // Why YouTube? Because video results usually don't have a description. if (item.includes('m.youtube.com') && innacurate_data && Constants.URLS.length > 1) { urls.splice(index, 1); titles.splice(index, 1); index--; } }); } } module.exports = Parser;