UNPKG

google-sr

Version:

JavaScript / TypeScript tools to fetch Google search results without an API key.

498 lines (485 loc) 18.2 kB
var __defProp = Object.defineProperty; var __name = (target, value) => __defProp(target, "name", { value, configurable: true }); // src/constants.ts var ResultTypes = { OrganicResult: "ORGANIC", TranslateResult: "TRANSLATE", DictionaryResult: "DICTIONARY", TimeResult: "TIME", UnitConversionResult: "CONVERSION", KnowledgePanelResult: "KNOWLEDGE_PANEL", NewsResult: "NEWS", RelatedSearchesResult: "RELATED_SEARCHES" }; var TranslateSourceTextRegex = /"(.+?)"/; var GOOGLE_REDIRECT_URL_PARAM_REGEX = /[?&](q|imgurl|url)=([^&]+)/; var GOOGLE_SEARCH_URL = "https://www.google.com/search"; // src/results/conversion.ts import { GeneralSelector, UnitConversionSelector } from "google-sr-selectors"; // src/utils.ts var baseHeaders = { Accept: "text/html, application/xhtml+xml, */*", "Accept-Encoding": "gzip, deflate", "Accept-Language": "en-US,en;q=0.5", // Use a Internet Explorer < v10 user agent to avoid being required javaScript by google "User-Agent": "Mozilla/5.0 (MSIE 10.0; Windows NT 6.1; Trident/5.0)", Connection: "Keep-Alive", Referer: "https://www.google.com/" }; async function safeGetFetch(options) { options.method = "GET"; if (!options.url) { throw new TypeError("Request options must contain a valid URL."); } const queryParams = options.queryParams?.toString(); const url = `${options.url}${queryParams ? `?${queryParams}` : ""}`; const response = await fetch(url, options); if (!response.ok) { throw new Error( `Failed to fetch ${url}: ${response.status} ${response.statusText}` ); } return response; } __name(safeGetFetch, "safeGetFetch"); function extractUrlFromGoogleLink(googleLink) { if (!googleLink) return null; const match = googleLink.match(GOOGLE_REDIRECT_URL_PARAM_REGEX); if (match?.[2]) { try { return decodeURIComponent(match[2]); } catch { return null; } } return null; } __name(extractUrlFromGoogleLink, "extractUrlFromGoogleLink"); function prepareRequestConfig(opts) { if (typeof opts.query !== "string") throw new TypeError( `Search query must be a string, received ${typeof opts.query} instead.` ); if (opts.requestConfig && typeof opts.requestConfig !== "object") throw new TypeError( `Request config must be an object if specified, received ${typeof opts.requestConfig}.` ); const requestConfig = Object.assign({}, opts.requestConfig); requestConfig.headers = requestConfig.headers ? Object.assign({}, baseHeaders, requestConfig.headers) : baseHeaders; if (!(requestConfig.queryParams instanceof URLSearchParams)) { requestConfig.queryParams = new URLSearchParams(requestConfig.queryParams); } requestConfig.queryParams.set("q", opts.query); requestConfig.queryParams.set("ie", "UTF-8"); requestConfig.url = GOOGLE_SEARCH_URL; return requestConfig; } __name(prepareRequestConfig, "prepareRequestConfig"); function throwNoCheerioError(resultParserName) { throw new TypeError( `CheerioAPI instance is missing, if using as a parser make sure to pass the raw function and not the result of calling it. (ex: [${resultParserName}] instead of [${resultParserName}()])` ); } __name(throwNoCheerioError, "throwNoCheerioError"); function coerceToStringOrUndefined(value) { if (typeof value !== "string") return void 0; return value === "" ? void 0 : value; } __name(coerceToStringOrUndefined, "coerceToStringOrUndefined"); // src/results/conversion.ts var UnitConversionResult = /* @__PURE__ */ __name(($, noPartialResults) => { if (!$) throwNoCheerioError("UnitConversionResult"); const block = $(GeneralSelector.block).first(); if (!block.length) return null; const from = coerceToStringOrUndefined( block.find(UnitConversionSelector.from).text().replace("=", "").trim() ); if (noPartialResults && !from) return null; const to = coerceToStringOrUndefined( block.find(UnitConversionSelector.to).text().trim() ); if (noPartialResults && !to) return null; return { type: ResultTypes.UnitConversionResult, from, to }; }, "UnitConversionResult"); // src/results/dictionary.ts import { DictionarySearchSelector, GeneralSelector as GeneralSelector2 } from "google-sr-selectors"; var parseDefinitionBlock = /* @__PURE__ */ __name((definitionBlock) => { const definitionTextBlock = definitionBlock.find( DictionarySearchSelector.definitionTextBlock ); const definitionText = definitionTextBlock.eq(0).text().trim(); const example = definitionTextBlock.eq(1).text().trim(); const synonyms = definitionTextBlock.eq(2).text().trim().replace("synonyms: ", "").split(", ").filter((s) => s !== ""); if (!definitionText) return null; const definition = { definition: definitionText }; if (example && example !== "") definition.example = example; if (synonyms && synonyms.length > 0) definition.synonyms = synonyms; return definition; }, "parseDefinitionBlock"); var DictionaryResult = /* @__PURE__ */ __name(($, noPartialResults) => { if (!$) throwNoCheerioError("DictionaryResult"); const dictionaryBlock = $(GeneralSelector2.block).first(); if (!dictionaryBlock.length) return null; const phonetic = coerceToStringOrUndefined( dictionaryBlock.find(DictionarySearchSelector.phonetic).first().text() ); if (noPartialResults && !phonetic) return null; const word = coerceToStringOrUndefined( dictionaryBlock.find(DictionarySearchSelector.word).text() ); if (noPartialResults && !word) return null; const definitionContainer = dictionaryBlock.find(DictionarySearchSelector.definitionsContainer).first(); if (!definitionContainer.length) return null; const definitionBlocks = definitionContainer.find(DictionarySearchSelector.definitionsBlock).toArray(); const meanings = []; let partOfSpeech; for (const definitionBlock of definitionBlocks) { const $definitionBlock = $(definitionBlock); if (!partOfSpeech) { partOfSpeech = coerceToStringOrUndefined( $definitionBlock.find(DictionarySearchSelector.definitionPartOfSpeech).first().text() ); } else { const definitionLists = $definitionBlock.find(DictionarySearchSelector.definitionList).toArray(); let definitions; if (definitionLists.length > 0) { definitions = definitionLists.map((item) => parseDefinitionBlock($(item))).filter((d) => d !== null); } else { const definition = parseDefinitionBlock($definitionBlock); if (definition) definitions = [definition]; } if (definitions && definitions.length > 0) { meanings.push({ partOfSpeech, definitions }); } partOfSpeech = void 0; } } if (noPartialResults && meanings.length === 0) return null; return { type: ResultTypes.DictionaryResult, phonetic, word, meanings }; }, "DictionaryResult"); // src/results/knowledge-panel.ts import { GeneralSelector as GeneralSelector3, KnowledgePanelSelector } from "google-sr-selectors"; var KnowledgePanelResult = /* @__PURE__ */ __name(($, noPartialResults) => { if (!$) throwNoCheerioError("KnowledgePanelResult"); const blocks = $(GeneralSelector3.block); let blocksSearched = 0; for (const block of blocks) { if (blocksSearched > 5) break; blocksSearched++; const $el = $(block); const headerBlock = $el.find(KnowledgePanelSelector.headerBlock).first(); if (!headerBlock.length) continue; const title = coerceToStringOrUndefined( headerBlock.find(KnowledgePanelSelector.title).text() ); if (noPartialResults && !title) break; const label = coerceToStringOrUndefined( headerBlock.find(KnowledgePanelSelector.label).text().trim() ); if (noPartialResults && !label) break; const descriptionBlock = $el.find(KnowledgePanelSelector.descriptionBlock); if (!descriptionBlock.length) continue; const description = coerceToStringOrUndefined( descriptionBlock.find("span").first().text() ); if (noPartialResults && !description) break; const sourceLink = descriptionBlock.find("a").attr("href"); const cleanSourceLink = coerceToStringOrUndefined( extractUrlFromGoogleLink(sourceLink) ); const metadataBlocks = $el.find(KnowledgePanelSelector.metadataBlock).toArray(); const metadata = []; for (const metadataContainerElement of metadataBlocks) { const metadataContainer = $(metadataContainerElement); const label2 = metadataContainer.find(KnowledgePanelSelector.metadataLabel).first().text(); if (label2 === "") continue; const value = metadataContainer.find(KnowledgePanelSelector.metadataValue).text(); if (value === "") continue; metadata.push({ label: label2, value }); } const imageLink = coerceToStringOrUndefined( headerBlock.next().find(KnowledgePanelSelector.imageUrl).attr("src") ); return { type: ResultTypes.KnowledgePanelResult, title, label, description, sourceLink: cleanSourceLink, imageLink, metadata }; } return null; }, "KnowledgePanelResult"); // src/results/news.ts import { GeneralSelector as GeneralSelector4, NewsSearchSelector } from "google-sr-selectors"; var NewsResult = /* @__PURE__ */ __name(($, noPartialResults) => { if (!$) throwNoCheerioError("NewsResult"); const parsedResults = []; const newsSearchBlocks = $(GeneralSelector4.block).toArray(); for (const element of newsSearchBlocks) { const $el = $(element); const rawLink = $el.find(NewsSearchSelector.link).attr("href"); if (typeof rawLink !== "string") continue; const link = coerceToStringOrUndefined(extractUrlFromGoogleLink(rawLink)); if (noPartialResults && !link) continue; const title = coerceToStringOrUndefined( $el.find(NewsSearchSelector.title).text() ); if (noPartialResults && !title) continue; const description = coerceToStringOrUndefined( $el.find(NewsSearchSelector.description).text() ); if (noPartialResults && !description) continue; const source = coerceToStringOrUndefined( $el.find(NewsSearchSelector.source).text() ); if (noPartialResults && !source) continue; const published_date = coerceToStringOrUndefined( $el.find(NewsSearchSelector.published_date).text() ); if (noPartialResults && !published_date) continue; const thumbnail_image = coerceToStringOrUndefined( $el.find(NewsSearchSelector.thumbnail_image).attr("src") ); parsedResults.push({ type: ResultTypes.NewsResult, link, title, description, source, published_date, thumbnail_image }); } return parsedResults; }, "NewsResult"); // src/results/organic.ts import { GeneralSelector as GeneralSelector5, OrganicSearchSelector } from "google-sr-selectors"; var OrganicResult = /* @__PURE__ */ __name(($, noPartialResults) => { if (!$) throwNoCheerioError("OrganicResult"); const parsedResults = []; const organicSearchBlocks = $(GeneralSelector5.block).get(); for (const element of organicSearchBlocks) { const $el = $(element); const result_href_link = $el.find(OrganicSearchSelector.link).attr("href"); if (typeof result_href_link !== "string") continue; const link = coerceToStringOrUndefined( extractUrlFromGoogleLink(result_href_link) ); if (noPartialResults && !link) continue; const title = coerceToStringOrUndefined( $el.find(OrganicSearchSelector.title).text() ); if (noPartialResults && !title) continue; const metaContainer = $el.find(OrganicSearchSelector.metaContainer); const metaSource = coerceToStringOrUndefined( metaContainer.find(OrganicSearchSelector.metaSource).text() ); if (noPartialResults && !metaSource) continue; const metaAd = coerceToStringOrUndefined( metaContainer.find(OrganicSearchSelector.metaAd).text() ); const description = coerceToStringOrUndefined( $el.find(OrganicSearchSelector.description).text() ); parsedResults.push({ type: ResultTypes.OrganicResult, link, description, title, source: metaSource, // since empty value is coerced to undefined // we can safely check if the value is empty or not isAd: Boolean(metaAd) }); } return parsedResults; }, "OrganicResult"); // src/results/related-searches.ts import { GeneralSelector as GeneralSelector6, RelatedSearchesSelector } from "google-sr-selectors"; var RelatedSearchesResult = /* @__PURE__ */ __name(($, noPartialResults) => { if (!$) throwNoCheerioError("RelatedSearchesResult"); const $relatedBlock = $(GeneralSelector6.block).last(); const $relatedQueries = $relatedBlock.find( RelatedSearchesSelector.query_item ); const queries = []; for (const query of $relatedQueries) { const $el = $(query); const text = coerceToStringOrUndefined( $el.find(RelatedSearchesSelector.text).text() ); if (text) { queries.push(text); } } if (noPartialResults && !queries.length) return null; return { type: ResultTypes.RelatedSearchesResult, queries }; }, "RelatedSearchesResult"); // src/results/time.ts import { TimeSearchSelector } from "google-sr-selectors"; var TimeResult = /* @__PURE__ */ __name(($, noPartialResults) => { if (!$) throwNoCheerioError("TimeResult"); const block = $(TimeSearchSelector.block).first(); const location = coerceToStringOrUndefined( block.find(TimeSearchSelector.location).text() ); if (noPartialResults && !location) return null; const layoutTable = block.find(TimeSearchSelector.timeLayoutTable).first(); if (!layoutTable) return null; const time = coerceToStringOrUndefined( layoutTable.find(TimeSearchSelector.time).text() ); if (noPartialResults && !time) return null; const timeInWords = coerceToStringOrUndefined( layoutTable.find(TimeSearchSelector.timeInWords).text() ); if (noPartialResults && !timeInWords) return null; return { type: ResultTypes.TimeResult, location, time, timeInWords }; }, "TimeResult"); // src/results/translate.ts import { GeneralSelector as GeneralSelector7, TranslateSearchSelector } from "google-sr-selectors"; var TranslateResult = /* @__PURE__ */ __name(($, noPartialResults) => { if (!$) throwNoCheerioError("TranslateResult"); const translateBlock = $(GeneralSelector7.block).first(); if (!translateBlock.length) return null; const translatedFromTo = coerceToStringOrUndefined( translateBlock.find(TranslateSearchSelector.translateFromTo).text() ); if (noPartialResults && !translatedFromTo) return null; let sourceLanguage; let translationLanguage; if (translatedFromTo) { const fromTo = translatedFromTo.split(" to "); if (fromTo.length !== 2) return null; sourceLanguage = coerceToStringOrUndefined(fromTo[0].trimEnd()); translationLanguage = coerceToStringOrUndefined(fromTo[1].trim()); } if (noPartialResults && (!sourceLanguage || !translationLanguage)) return null; const sourceTextBlock = coerceToStringOrUndefined( translateBlock.find(TranslateSearchSelector.sourceText).text() ); if (noPartialResults && !sourceTextBlock) return null; let sourceText; if (sourceTextBlock) { sourceText = sourceTextBlock.match(TranslateSourceTextRegex)?.[1]; } if (noPartialResults && !sourceText) return null; const translatedText = coerceToStringOrUndefined( translateBlock.find(TranslateSearchSelector.translatedText).text() ); if (noPartialResults && !translatedText) return null; return { type: ResultTypes.TranslateResult, sourceLanguage, translationLanguage, sourceText, translatedText }; }, "TranslateResult"); // src/search.ts import { load } from "cheerio"; async function search(searchOptions) { if (!searchOptions || typeof searchOptions !== "object") { throw new TypeError( `Search options must be provided. Received ${typeof searchOptions}` ); } const requestConfig = prepareRequestConfig(searchOptions); const response = await safeGetFetch(requestConfig); const data = await response.text(); const cheerioApi = load(data); const parsers = searchOptions.parsers || [OrganicResult]; let searchResults = []; for (const parser of parsers) { const result = parser( cheerioApi, Boolean(searchOptions.noPartialResults) ); if (result) searchResults = searchResults.concat(result); } return searchResults; } __name(search, "search"); async function searchWithPages(options) { if (!options) throw new TypeError( `Search options must be provided. Received ${typeof options}` ); if (typeof options.pages !== "number" && !Array.isArray(options.pages)) throw new TypeError( `Page must be a number or an array of numbers. Received ${typeof options.pages}` ); const searchResults = []; const pages = Array.isArray(options.pages) ? options.pages : Array.from({ length: options.pages }, (_, i) => i * 10); const baseRequestConfig = prepareRequestConfig(options); const parsers = options.parsers || [OrganicResult]; const delay = options.delay ?? 1e3; for (const page of pages) { baseRequestConfig.queryParams.set( "start", String(page) ); const response = await safeGetFetch(baseRequestConfig); const data = await response.text(); const cheerioApi = load(data); let pageResults = []; for (const parser of parsers) { const result = parser( cheerioApi, Boolean(options.noPartialResults) ); if (result) pageResults = pageResults.concat(result); } searchResults.push(pageResults); if (page !== pages[pages.length - 1] && delay > 0) { await new Promise((resolve) => setTimeout(resolve, delay)); } } return searchResults; } __name(searchWithPages, "searchWithPages"); export { DictionaryResult, GOOGLE_REDIRECT_URL_PARAM_REGEX, GOOGLE_SEARCH_URL, KnowledgePanelResult, NewsResult, OrganicResult, RelatedSearchesResult, ResultTypes, TimeResult, TranslateResult, TranslateSourceTextRegex, UnitConversionResult, search, searchWithPages };