google-sr
Version:
JavaScript / TypeScript tools to fetch Google search results without an API key.
498 lines (485 loc) • 18.2 kB
JavaScript
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
// src/constants.ts
var ResultTypes = {
OrganicResult: "ORGANIC",
TranslateResult: "TRANSLATE",
DictionaryResult: "DICTIONARY",
TimeResult: "TIME",
UnitConversionResult: "CONVERSION",
KnowledgePanelResult: "KNOWLEDGE_PANEL",
NewsResult: "NEWS",
RelatedSearchesResult: "RELATED_SEARCHES"
};
var TranslateSourceTextRegex = /"(.+?)"/;
var GOOGLE_REDIRECT_URL_PARAM_REGEX = /[?&](q|imgurl|url)=([^&]+)/;
var GOOGLE_SEARCH_URL = "https://www.google.com/search";
// src/results/conversion.ts
import { GeneralSelector, UnitConversionSelector } from "google-sr-selectors";
// src/utils.ts
var baseHeaders = {
Accept: "text/html, application/xhtml+xml, */*",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "en-US,en;q=0.5",
// Use a Internet Explorer < v10 user agent to avoid being required javaScript by google
"User-Agent": "Mozilla/5.0 (MSIE 10.0; Windows NT 6.1; Trident/5.0)",
Connection: "Keep-Alive",
Referer: "https://www.google.com/"
};
async function safeGetFetch(options) {
options.method = "GET";
if (!options.url) {
throw new TypeError("Request options must contain a valid URL.");
}
const queryParams = options.queryParams?.toString();
const url = `${options.url}${queryParams ? `?${queryParams}` : ""}`;
const response = await fetch(url, options);
if (!response.ok) {
throw new Error(
`Failed to fetch ${url}: ${response.status} ${response.statusText}`
);
}
return response;
}
__name(safeGetFetch, "safeGetFetch");
function extractUrlFromGoogleLink(googleLink) {
if (!googleLink) return null;
const match = googleLink.match(GOOGLE_REDIRECT_URL_PARAM_REGEX);
if (match?.[2]) {
try {
return decodeURIComponent(match[2]);
} catch {
return null;
}
}
return null;
}
__name(extractUrlFromGoogleLink, "extractUrlFromGoogleLink");
function prepareRequestConfig(opts) {
if (typeof opts.query !== "string")
throw new TypeError(
`Search query must be a string, received ${typeof opts.query} instead.`
);
if (opts.requestConfig && typeof opts.requestConfig !== "object")
throw new TypeError(
`Request config must be an object if specified, received ${typeof opts.requestConfig}.`
);
const requestConfig = Object.assign({}, opts.requestConfig);
requestConfig.headers = requestConfig.headers ? Object.assign({}, baseHeaders, requestConfig.headers) : baseHeaders;
if (!(requestConfig.queryParams instanceof URLSearchParams)) {
requestConfig.queryParams = new URLSearchParams(requestConfig.queryParams);
}
requestConfig.queryParams.set("q", opts.query);
requestConfig.queryParams.set("ie", "UTF-8");
requestConfig.url = GOOGLE_SEARCH_URL;
return requestConfig;
}
__name(prepareRequestConfig, "prepareRequestConfig");
function throwNoCheerioError(resultParserName) {
throw new TypeError(
`CheerioAPI instance is missing, if using as a parser make sure to pass the raw function and not the result of calling it. (ex: [${resultParserName}] instead of [${resultParserName}()])`
);
}
__name(throwNoCheerioError, "throwNoCheerioError");
function coerceToStringOrUndefined(value) {
if (typeof value !== "string") return void 0;
return value === "" ? void 0 : value;
}
__name(coerceToStringOrUndefined, "coerceToStringOrUndefined");
// src/results/conversion.ts
var UnitConversionResult = /* @__PURE__ */ __name(($, noPartialResults) => {
if (!$) throwNoCheerioError("UnitConversionResult");
const block = $(GeneralSelector.block).first();
if (!block.length) return null;
const from = coerceToStringOrUndefined(
block.find(UnitConversionSelector.from).text().replace("=", "").trim()
);
if (noPartialResults && !from) return null;
const to = coerceToStringOrUndefined(
block.find(UnitConversionSelector.to).text().trim()
);
if (noPartialResults && !to) return null;
return {
type: ResultTypes.UnitConversionResult,
from,
to
};
}, "UnitConversionResult");
// src/results/dictionary.ts
import { DictionarySearchSelector, GeneralSelector as GeneralSelector2 } from "google-sr-selectors";
var parseDefinitionBlock = /* @__PURE__ */ __name((definitionBlock) => {
const definitionTextBlock = definitionBlock.find(
DictionarySearchSelector.definitionTextBlock
);
const definitionText = definitionTextBlock.eq(0).text().trim();
const example = definitionTextBlock.eq(1).text().trim();
const synonyms = definitionTextBlock.eq(2).text().trim().replace("synonyms: ", "").split(", ").filter((s) => s !== "");
if (!definitionText) return null;
const definition = {
definition: definitionText
};
if (example && example !== "") definition.example = example;
if (synonyms && synonyms.length > 0) definition.synonyms = synonyms;
return definition;
}, "parseDefinitionBlock");
var DictionaryResult = /* @__PURE__ */ __name(($, noPartialResults) => {
if (!$) throwNoCheerioError("DictionaryResult");
const dictionaryBlock = $(GeneralSelector2.block).first();
if (!dictionaryBlock.length) return null;
const phonetic = coerceToStringOrUndefined(
dictionaryBlock.find(DictionarySearchSelector.phonetic).first().text()
);
if (noPartialResults && !phonetic) return null;
const word = coerceToStringOrUndefined(
dictionaryBlock.find(DictionarySearchSelector.word).text()
);
if (noPartialResults && !word) return null;
const definitionContainer = dictionaryBlock.find(DictionarySearchSelector.definitionsContainer).first();
if (!definitionContainer.length) return null;
const definitionBlocks = definitionContainer.find(DictionarySearchSelector.definitionsBlock).toArray();
const meanings = [];
let partOfSpeech;
for (const definitionBlock of definitionBlocks) {
const $definitionBlock = $(definitionBlock);
if (!partOfSpeech) {
partOfSpeech = coerceToStringOrUndefined(
$definitionBlock.find(DictionarySearchSelector.definitionPartOfSpeech).first().text()
);
} else {
const definitionLists = $definitionBlock.find(DictionarySearchSelector.definitionList).toArray();
let definitions;
if (definitionLists.length > 0) {
definitions = definitionLists.map((item) => parseDefinitionBlock($(item))).filter((d) => d !== null);
} else {
const definition = parseDefinitionBlock($definitionBlock);
if (definition) definitions = [definition];
}
if (definitions && definitions.length > 0) {
meanings.push({
partOfSpeech,
definitions
});
}
partOfSpeech = void 0;
}
}
if (noPartialResults && meanings.length === 0) return null;
return {
type: ResultTypes.DictionaryResult,
phonetic,
word,
meanings
};
}, "DictionaryResult");
// src/results/knowledge-panel.ts
import { GeneralSelector as GeneralSelector3, KnowledgePanelSelector } from "google-sr-selectors";
var KnowledgePanelResult = /* @__PURE__ */ __name(($, noPartialResults) => {
if (!$) throwNoCheerioError("KnowledgePanelResult");
const blocks = $(GeneralSelector3.block);
let blocksSearched = 0;
for (const block of blocks) {
if (blocksSearched > 5) break;
blocksSearched++;
const $el = $(block);
const headerBlock = $el.find(KnowledgePanelSelector.headerBlock).first();
if (!headerBlock.length) continue;
const title = coerceToStringOrUndefined(
headerBlock.find(KnowledgePanelSelector.title).text()
);
if (noPartialResults && !title) break;
const label = coerceToStringOrUndefined(
headerBlock.find(KnowledgePanelSelector.label).text().trim()
);
if (noPartialResults && !label) break;
const descriptionBlock = $el.find(KnowledgePanelSelector.descriptionBlock);
if (!descriptionBlock.length) continue;
const description = coerceToStringOrUndefined(
descriptionBlock.find("span").first().text()
);
if (noPartialResults && !description) break;
const sourceLink = descriptionBlock.find("a").attr("href");
const cleanSourceLink = coerceToStringOrUndefined(
extractUrlFromGoogleLink(sourceLink)
);
const metadataBlocks = $el.find(KnowledgePanelSelector.metadataBlock).toArray();
const metadata = [];
for (const metadataContainerElement of metadataBlocks) {
const metadataContainer = $(metadataContainerElement);
const label2 = metadataContainer.find(KnowledgePanelSelector.metadataLabel).first().text();
if (label2 === "") continue;
const value = metadataContainer.find(KnowledgePanelSelector.metadataValue).text();
if (value === "") continue;
metadata.push({
label: label2,
value
});
}
const imageLink = coerceToStringOrUndefined(
headerBlock.next().find(KnowledgePanelSelector.imageUrl).attr("src")
);
return {
type: ResultTypes.KnowledgePanelResult,
title,
label,
description,
sourceLink: cleanSourceLink,
imageLink,
metadata
};
}
return null;
}, "KnowledgePanelResult");
// src/results/news.ts
import { GeneralSelector as GeneralSelector4, NewsSearchSelector } from "google-sr-selectors";
var NewsResult = /* @__PURE__ */ __name(($, noPartialResults) => {
if (!$) throwNoCheerioError("NewsResult");
const parsedResults = [];
const newsSearchBlocks = $(GeneralSelector4.block).toArray();
for (const element of newsSearchBlocks) {
const $el = $(element);
const rawLink = $el.find(NewsSearchSelector.link).attr("href");
if (typeof rawLink !== "string") continue;
const link = coerceToStringOrUndefined(extractUrlFromGoogleLink(rawLink));
if (noPartialResults && !link) continue;
const title = coerceToStringOrUndefined(
$el.find(NewsSearchSelector.title).text()
);
if (noPartialResults && !title) continue;
const description = coerceToStringOrUndefined(
$el.find(NewsSearchSelector.description).text()
);
if (noPartialResults && !description) continue;
const source = coerceToStringOrUndefined(
$el.find(NewsSearchSelector.source).text()
);
if (noPartialResults && !source) continue;
const published_date = coerceToStringOrUndefined(
$el.find(NewsSearchSelector.published_date).text()
);
if (noPartialResults && !published_date) continue;
const thumbnail_image = coerceToStringOrUndefined(
$el.find(NewsSearchSelector.thumbnail_image).attr("src")
);
parsedResults.push({
type: ResultTypes.NewsResult,
link,
title,
description,
source,
published_date,
thumbnail_image
});
}
return parsedResults;
}, "NewsResult");
// src/results/organic.ts
import { GeneralSelector as GeneralSelector5, OrganicSearchSelector } from "google-sr-selectors";
var OrganicResult = /* @__PURE__ */ __name(($, noPartialResults) => {
if (!$) throwNoCheerioError("OrganicResult");
const parsedResults = [];
const organicSearchBlocks = $(GeneralSelector5.block).get();
for (const element of organicSearchBlocks) {
const $el = $(element);
const result_href_link = $el.find(OrganicSearchSelector.link).attr("href");
if (typeof result_href_link !== "string") continue;
const link = coerceToStringOrUndefined(
extractUrlFromGoogleLink(result_href_link)
);
if (noPartialResults && !link) continue;
const title = coerceToStringOrUndefined(
$el.find(OrganicSearchSelector.title).text()
);
if (noPartialResults && !title) continue;
const metaContainer = $el.find(OrganicSearchSelector.metaContainer);
const metaSource = coerceToStringOrUndefined(
metaContainer.find(OrganicSearchSelector.metaSource).text()
);
if (noPartialResults && !metaSource) continue;
const metaAd = coerceToStringOrUndefined(
metaContainer.find(OrganicSearchSelector.metaAd).text()
);
const description = coerceToStringOrUndefined(
$el.find(OrganicSearchSelector.description).text()
);
parsedResults.push({
type: ResultTypes.OrganicResult,
link,
description,
title,
source: metaSource,
// since empty value is coerced to undefined
// we can safely check if the value is empty or not
isAd: Boolean(metaAd)
});
}
return parsedResults;
}, "OrganicResult");
// src/results/related-searches.ts
import { GeneralSelector as GeneralSelector6, RelatedSearchesSelector } from "google-sr-selectors";
var RelatedSearchesResult = /* @__PURE__ */ __name(($, noPartialResults) => {
if (!$) throwNoCheerioError("RelatedSearchesResult");
const $relatedBlock = $(GeneralSelector6.block).last();
const $relatedQueries = $relatedBlock.find(
RelatedSearchesSelector.query_item
);
const queries = [];
for (const query of $relatedQueries) {
const $el = $(query);
const text = coerceToStringOrUndefined(
$el.find(RelatedSearchesSelector.text).text()
);
if (text) {
queries.push(text);
}
}
if (noPartialResults && !queries.length) return null;
return {
type: ResultTypes.RelatedSearchesResult,
queries
};
}, "RelatedSearchesResult");
// src/results/time.ts
import { TimeSearchSelector } from "google-sr-selectors";
var TimeResult = /* @__PURE__ */ __name(($, noPartialResults) => {
if (!$) throwNoCheerioError("TimeResult");
const block = $(TimeSearchSelector.block).first();
const location = coerceToStringOrUndefined(
block.find(TimeSearchSelector.location).text()
);
if (noPartialResults && !location) return null;
const layoutTable = block.find(TimeSearchSelector.timeLayoutTable).first();
if (!layoutTable) return null;
const time = coerceToStringOrUndefined(
layoutTable.find(TimeSearchSelector.time).text()
);
if (noPartialResults && !time) return null;
const timeInWords = coerceToStringOrUndefined(
layoutTable.find(TimeSearchSelector.timeInWords).text()
);
if (noPartialResults && !timeInWords) return null;
return {
type: ResultTypes.TimeResult,
location,
time,
timeInWords
};
}, "TimeResult");
// src/results/translate.ts
import { GeneralSelector as GeneralSelector7, TranslateSearchSelector } from "google-sr-selectors";
var TranslateResult = /* @__PURE__ */ __name(($, noPartialResults) => {
if (!$) throwNoCheerioError("TranslateResult");
const translateBlock = $(GeneralSelector7.block).first();
if (!translateBlock.length) return null;
const translatedFromTo = coerceToStringOrUndefined(
translateBlock.find(TranslateSearchSelector.translateFromTo).text()
);
if (noPartialResults && !translatedFromTo) return null;
let sourceLanguage;
let translationLanguage;
if (translatedFromTo) {
const fromTo = translatedFromTo.split(" to ");
if (fromTo.length !== 2) return null;
sourceLanguage = coerceToStringOrUndefined(fromTo[0].trimEnd());
translationLanguage = coerceToStringOrUndefined(fromTo[1].trim());
}
if (noPartialResults && (!sourceLanguage || !translationLanguage))
return null;
const sourceTextBlock = coerceToStringOrUndefined(
translateBlock.find(TranslateSearchSelector.sourceText).text()
);
if (noPartialResults && !sourceTextBlock) return null;
let sourceText;
if (sourceTextBlock) {
sourceText = sourceTextBlock.match(TranslateSourceTextRegex)?.[1];
}
if (noPartialResults && !sourceText) return null;
const translatedText = coerceToStringOrUndefined(
translateBlock.find(TranslateSearchSelector.translatedText).text()
);
if (noPartialResults && !translatedText) return null;
return {
type: ResultTypes.TranslateResult,
sourceLanguage,
translationLanguage,
sourceText,
translatedText
};
}, "TranslateResult");
// src/search.ts
import { load } from "cheerio";
async function search(searchOptions) {
if (!searchOptions || typeof searchOptions !== "object") {
throw new TypeError(
`Search options must be provided. Received ${typeof searchOptions}`
);
}
const requestConfig = prepareRequestConfig(searchOptions);
const response = await safeGetFetch(requestConfig);
const data = await response.text();
const cheerioApi = load(data);
const parsers = searchOptions.parsers || [OrganicResult];
let searchResults = [];
for (const parser of parsers) {
const result = parser(
cheerioApi,
Boolean(searchOptions.noPartialResults)
);
if (result) searchResults = searchResults.concat(result);
}
return searchResults;
}
__name(search, "search");
async function searchWithPages(options) {
if (!options)
throw new TypeError(
`Search options must be provided. Received ${typeof options}`
);
if (typeof options.pages !== "number" && !Array.isArray(options.pages))
throw new TypeError(
`Page must be a number or an array of numbers. Received ${typeof options.pages}`
);
const searchResults = [];
const pages = Array.isArray(options.pages) ? options.pages : Array.from({ length: options.pages }, (_, i) => i * 10);
const baseRequestConfig = prepareRequestConfig(options);
const parsers = options.parsers || [OrganicResult];
const delay = options.delay ?? 1e3;
for (const page of pages) {
baseRequestConfig.queryParams.set(
"start",
String(page)
);
const response = await safeGetFetch(baseRequestConfig);
const data = await response.text();
const cheerioApi = load(data);
let pageResults = [];
for (const parser of parsers) {
const result = parser(
cheerioApi,
Boolean(options.noPartialResults)
);
if (result) pageResults = pageResults.concat(result);
}
searchResults.push(pageResults);
if (page !== pages[pages.length - 1] && delay > 0) {
await new Promise((resolve) => setTimeout(resolve, delay));
}
}
return searchResults;
}
__name(searchWithPages, "searchWithPages");
export {
DictionaryResult,
GOOGLE_REDIRECT_URL_PARAM_REGEX,
GOOGLE_SEARCH_URL,
KnowledgePanelResult,
NewsResult,
OrganicResult,
RelatedSearchesResult,
ResultTypes,
TimeResult,
TranslateResult,
TranslateSourceTextRegex,
UnitConversionResult,
search,
searchWithPages
};