google-sr
Version:
Fast and efficient Package for scraping Google search results without the need for an API key
338 lines (331 loc) • 12.4 kB
JavaScript
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
// src/search.ts
import axios from "axios";
import { load } from "cheerio";
// src/results.ts
import {
CurrencyConvertSelector,
DictionarySearchSelector,
GeneralSelector,
KnowledgePanelSelector,
OrganicSearchSelector,
TimeSearchSelector,
TranslateSearchSelector
} from "google-sr-selectors";
// src/constants.ts
var ResultTypes = {
OrganicResult: "ORGANIC",
TranslateResult: "TRANSLATE",
DictionaryResult: "DICTIONARY",
TimeResult: "TIME",
CurrencyResult: "CURRENCY",
KnowledgePanelResult: "KNOWLEDGE_PANEL"
};
var TranslateSourceTextRegex = /"(.+?)"/;
// src/utils.ts
var baseHeaders = {
Accept: "text/html",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "en-US,en",
Referer: "https://www.google.com/",
"upgrade-insecure-requests": 1,
// the tested user agent is for Chrome 103 on Windows 10
"User-Agent": "Links (2.29; Linux 6.11.0-13-generic x86_64; GNU C 13.2; text)"
};
function extractUrlFromGoogleLink(googleLink) {
if (!googleLink) return null;
const regex = /[?&](q|imgurl)=([^&]+)/;
const match = googleLink.match(regex);
if (match?.[2]) {
try {
return decodeURIComponent(match[2]);
} catch {
return null;
}
}
return null;
}
__name(extractUrlFromGoogleLink, "extractUrlFromGoogleLink");
function prepareRequestConfig(opts) {
const requestConfig = opts.requestConfig ?? {};
if (typeof opts.query !== "string")
throw new TypeError(
`Search query must be a string, received ${typeof opts.query} instead.`
);
if (typeof requestConfig !== "object")
throw new TypeError(
`Request config must be an object if specified, received ${typeof requestConfig}.`
);
requestConfig.headers = requestConfig.headers ? Object.assign({}, baseHeaders, requestConfig.headers) : baseHeaders;
requestConfig.url = requestConfig.url ?? "https://www.google.com/search";
if (!(requestConfig.params instanceof URLSearchParams)) {
requestConfig.params = new URLSearchParams(requestConfig.params);
}
requestConfig.params.set("q", opts.query);
requestConfig.params.set("gbv", "1");
requestConfig.responseType = "text";
return requestConfig;
}
__name(prepareRequestConfig, "prepareRequestConfig");
function throwNoCheerioError(resultParserName) {
throw new TypeError(
`CheerioAPI instance is missing, if using as a selector make sure to pass the raw function and not the result of calling it. (ex: [${resultParserName}] instead of [${resultParserName}()])`
);
}
__name(throwNoCheerioError, "throwNoCheerioError");
function isEmpty(strictSelector, ...values) {
if (strictSelector)
return values.some(
(value) => value === "" || value === void 0 || value === null
);
return values.every(
(value) => value === "" || value === void 0 || value === null
);
}
__name(isEmpty, "isEmpty");
// src/results.ts
var OrganicResult = /* @__PURE__ */ __name(($, strictSelector) => {
if (!$) throwNoCheerioError("OrganicResult");
const parsedResults = [];
const organicSearchBlocks = $(GeneralSelector.block).toArray();
for (const element of organicSearchBlocks) {
let link = $(element).find(OrganicSearchSelector.link).attr("href") ?? null;
const description = $(element).find(OrganicSearchSelector.description).text();
const title = $(element).find(OrganicSearchSelector.title).text();
link = extractUrlFromGoogleLink(link);
if (typeof link !== "string") continue;
if (isEmpty(strictSelector, description, title)) continue;
parsedResults.push({
type: ResultTypes.OrganicResult,
link,
description,
title
});
}
return parsedResults;
}, "OrganicResult");
var TranslateResult = /* @__PURE__ */ __name(($, strictSelector) => {
if (!$) throwNoCheerioError("TranslateResult");
const translateBlock = $(GeneralSelector.block).first();
if (!translateBlock) return null;
const translatedFromTo = translateBlock.find(TranslateSearchSelector.translateFromTo).text();
const fromTo = translatedFromTo.split(" to ");
if (fromTo.length !== 2) return null;
const sourceLanguage = fromTo[0].trim();
const translationLanguage = fromTo[1].trim();
const sourceTextBlock = translateBlock.find(TranslateSearchSelector.sourceText).text().trim();
const sourceText = sourceTextBlock.match(TranslateSourceTextRegex)?.[1] ?? "";
const translatedText = translateBlock.find(TranslateSearchSelector.translatedText).text().trim();
if (isEmpty(
strictSelector,
sourceLanguage,
translationLanguage,
sourceText,
translatedText
))
return null;
return {
type: ResultTypes.TranslateResult,
sourceLanguage,
translationLanguage,
sourceText,
translatedText
};
}, "TranslateResult");
var parseDefinitionBlock = /* @__PURE__ */ __name((definitionBlock) => {
const definitionTextBlock = definitionBlock.find(
DictionarySearchSelector.definitionTextBlock
);
const definitionText = definitionTextBlock.eq(0).text().trim();
const example = definitionTextBlock.eq(1).text().trim();
const synonyms = definitionTextBlock.eq(2).text().trim().replace("synonyms: ", "").split(", ").filter((s) => s !== "");
if (!definitionText) return null;
const definition = {
definition: definitionText
};
if (example && example !== "") definition.example = example;
if (synonyms && synonyms.length > 0) definition.synonyms = synonyms;
return definition;
}, "parseDefinitionBlock");
var DictionaryResult = /* @__PURE__ */ __name(($, strictSelector) => {
if (!$) throwNoCheerioError("DictionaryResult");
const dictionaryBlock = $(GeneralSelector.block).first();
if (!dictionaryBlock) return null;
const phonetic = dictionaryBlock.find(DictionarySearchSelector.phonetic).first().text().trim();
const word = dictionaryBlock.find(DictionarySearchSelector.word).text().trim();
const meanings = [];
const definitionContainer = dictionaryBlock.find(DictionarySearchSelector.definitionsContainer).first();
if (!definitionContainer) return null;
const definitionBlocks = definitionContainer.find(DictionarySearchSelector.definitionsBlock).toArray();
let partOfSpeech = null;
for (const definitionBlock of definitionBlocks) {
if (!partOfSpeech) {
partOfSpeech = $(definitionBlock).find(DictionarySearchSelector.definitionPartOfSpeech).first().text().trim();
} else {
const definitionLists = $(definitionBlock).find(DictionarySearchSelector.definitionList).toArray();
let definitions;
if (definitionLists.length > 0) {
definitions = definitionLists.map((item) => parseDefinitionBlock($(item))).filter((d) => d !== null);
} else {
const definition = parseDefinitionBlock($(definitionBlock));
if (definition) definitions = [definition];
else definitions = [];
}
if (definitions.length > 0) {
meanings.push({
partOfSpeech,
definitions
});
}
partOfSpeech = null;
}
}
if (isEmpty(strictSelector, phonetic, word)) return null;
return {
type: ResultTypes.DictionaryResult,
phonetic,
word,
meanings
};
}, "DictionaryResult");
var TimeResult = /* @__PURE__ */ __name(($, strictSelector) => {
if (!$) throwNoCheerioError("TimeResult");
const block = $(TimeSearchSelector.block).first();
const location = block.find(TimeSearchSelector.location).text();
if (location === "") return null;
const layoutTable = block.find(TimeSearchSelector.timeLayoutTable).first();
if (!layoutTable) return null;
const time = layoutTable.find(TimeSearchSelector.time).text();
const timeInWords = layoutTable.find(TimeSearchSelector.timeInWords).text();
if (isEmpty(strictSelector, time, timeInWords)) return null;
return {
type: ResultTypes.TimeResult,
location,
time,
timeInWords
};
}, "TimeResult");
var CurrencyResult = /* @__PURE__ */ __name(($, strictSelector) => {
if (!$) throwNoCheerioError("CurrencyResult");
const block = $(GeneralSelector.block).first();
const from = block.find(CurrencyConvertSelector.from).text().replace("=", "").trim();
const to = block.find(CurrencyConvertSelector.to).text().trim();
if (isEmpty(strictSelector, from, to)) return null;
return {
type: ResultTypes.CurrencyResult,
from,
to
};
}, "CurrencyResult");
var KnowledgePanelResult = /* @__PURE__ */ __name(($, strictSelector) => {
if (!$) throwNoCheerioError("KnowledgePanelResult");
const blocks = $(GeneralSelector.block);
let knowledgePanel = null;
blocks.each((index, element) => {
if (index > 5) return false;
const block = $(element);
const headerContainer = block.find(KnowledgePanelSelector.headerBlock);
const headerBlock = headerContainer.first();
const imageContainer = headerBlock.next();
if (!headerBlock) return;
const title = headerBlock.find(KnowledgePanelSelector.title).text().trim();
const label = headerBlock.find(KnowledgePanelSelector.label).text().trim();
const imageLink = imageContainer.find(KnowledgePanelSelector.imageUrl).attr("src");
if (title === "" || label === "") return;
const descriptionBlock = block.find(
KnowledgePanelSelector.descriptionBlock
);
const description = descriptionBlock.find("span").first().text().trim();
const sourceLink = descriptionBlock.find("a").attr("href");
const cleanSourceLink = extractUrlFromGoogleLink(sourceLink ?? null);
const metadataBlocks = block.find(KnowledgePanelSelector.metadataBlock).toArray();
const metadata = [];
for (const metadataContainerElement of metadataBlocks) {
const metadataContainer = $(metadataContainerElement);
const label2 = metadataContainer.find(KnowledgePanelSelector.metadataLabel).first().text().trim();
if (label2 === "") continue;
const value = metadataContainer.find(KnowledgePanelSelector.metadataValue).text().trim();
if (value === "") continue;
metadata.push({
label: label2,
value
});
}
if (!isEmpty(strictSelector, title, description, label))
knowledgePanel = {
type: ResultTypes.KnowledgePanelResult,
title,
label,
description,
sourceLink: cleanSourceLink,
imageLink: imageLink ?? null,
metadata
};
return false;
});
return knowledgePanel;
}, "KnowledgePanelResult");
// src/search.ts
async function search(options) {
if (!options)
throw new TypeError(
`Search options must be provided. Received ${typeof options}`
);
const requestConfig = prepareRequestConfig(options);
const { data } = await axios(requestConfig);
const cheerioApi = load(data);
const selectors = options.resultTypes || [OrganicResult];
let searchResults = [];
for (const selector of selectors) {
const result = selector(
cheerioApi,
Boolean(options.strictSelector)
);
if (result) searchResults = searchResults.concat(result);
}
return searchResults;
}
__name(search, "search");
async function searchWithPages(options) {
if (!options)
throw new TypeError(
`Search options must be provided. Received ${typeof options}`
);
if (typeof options.pages !== "number" && !Array.isArray(options.pages))
throw new TypeError(
`Page must be a number or an array of numbers. Received ${typeof options.pages}`
);
const searchResults = [];
const pages = Array.isArray(options.pages) ? options.pages : Array.from({ length: options.pages }, (_, i) => i * 10);
const baseRequestConfig = prepareRequestConfig(options);
const selectors = options.resultTypes || [OrganicResult];
for (const page of pages) {
baseRequestConfig.params.set("start", String(page));
const { data } = await axios(baseRequestConfig);
const cheerioApi = load(data);
let pageResults = [];
for (const selector of selectors) {
const result = selector(
cheerioApi,
Boolean(options.strictSelector)
);
if (result) pageResults = pageResults.concat(result);
}
searchResults.push(pageResults);
}
return searchResults;
}
__name(searchWithPages, "searchWithPages");
export {
CurrencyResult,
DictionaryResult,
KnowledgePanelResult,
OrganicResult,
ResultTypes,
TimeResult,
TranslateResult,
TranslateSourceTextRegex,
search,
searchWithPages
};