google-sr
Version:
Fast and efficient Package for scraping Google search results without the need for an API key
374 lines (366 loc) • 15.2 kB
JavaScript
;
var __create = Object.create;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __getProtoOf = Object.getPrototypeOf;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
// If the importer is in node compatibility mode or this is not an ESM
// file that has been converted to a CommonJS file using a Babel-
// compatible transform (i.e. "__esModule" has not been set), then set
// "default" to the CommonJS "module.exports" for node compatibility.
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
mod
));
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/index.ts
var src_exports = {};
__export(src_exports, {
CurrencyResult: () => CurrencyResult,
DictionaryResult: () => DictionaryResult,
KnowledgePanelResult: () => KnowledgePanelResult,
OrganicResult: () => OrganicResult,
ResultTypes: () => ResultTypes,
TimeResult: () => TimeResult,
TranslateResult: () => TranslateResult,
TranslateSourceTextRegex: () => TranslateSourceTextRegex,
search: () => search,
searchWithPages: () => searchWithPages
});
module.exports = __toCommonJS(src_exports);
// src/search.ts
var import_axios = __toESM(require("axios"));
var import_cheerio = require("cheerio");
// src/results.ts
var import_google_sr_selectors = require("google-sr-selectors");
// src/constants.ts
var ResultTypes = {
OrganicResult: "ORGANIC",
TranslateResult: "TRANSLATE",
DictionaryResult: "DICTIONARY",
TimeResult: "TIME",
CurrencyResult: "CURRENCY",
KnowledgePanelResult: "KNOWLEDGE_PANEL"
};
var TranslateSourceTextRegex = /"(.+?)"/;
// src/utils.ts
var baseHeaders = {
Accept: "text/html",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "en-US,en",
Referer: "https://www.google.com/",
"upgrade-insecure-requests": 1,
// the tested user agent is for Chrome 103 on Windows 10
"User-Agent": "Links (2.29; Linux 6.11.0-13-generic x86_64; GNU C 13.2; text)"
};
function extractUrlFromGoogleLink(googleLink) {
if (!googleLink) return null;
const regex = /[?&](q|imgurl)=([^&]+)/;
const match = googleLink.match(regex);
if (match?.[2]) {
try {
return decodeURIComponent(match[2]);
} catch {
return null;
}
}
return null;
}
__name(extractUrlFromGoogleLink, "extractUrlFromGoogleLink");
function prepareRequestConfig(opts) {
const requestConfig = opts.requestConfig ?? {};
if (typeof opts.query !== "string")
throw new TypeError(
`Search query must be a string, received ${typeof opts.query} instead.`
);
if (typeof requestConfig !== "object")
throw new TypeError(
`Request config must be an object if specified, received ${typeof requestConfig}.`
);
requestConfig.headers = requestConfig.headers ? Object.assign({}, baseHeaders, requestConfig.headers) : baseHeaders;
requestConfig.url = requestConfig.url ?? "https://www.google.com/search";
if (!(requestConfig.params instanceof URLSearchParams)) {
requestConfig.params = new URLSearchParams(requestConfig.params);
}
requestConfig.params.set("q", opts.query);
requestConfig.params.set("gbv", "1");
requestConfig.responseType = "text";
return requestConfig;
}
__name(prepareRequestConfig, "prepareRequestConfig");
function throwNoCheerioError(resultParserName) {
throw new TypeError(
`CheerioAPI instance is missing, if using as a selector make sure to pass the raw function and not the result of calling it. (ex: [${resultParserName}] instead of [${resultParserName}()])`
);
}
__name(throwNoCheerioError, "throwNoCheerioError");
function isEmpty(strictSelector, ...values) {
if (strictSelector)
return values.some(
(value) => value === "" || value === void 0 || value === null
);
return values.every(
(value) => value === "" || value === void 0 || value === null
);
}
__name(isEmpty, "isEmpty");
// src/results.ts
var OrganicResult = /* @__PURE__ */ __name(($, strictSelector) => {
if (!$) throwNoCheerioError("OrganicResult");
const parsedResults = [];
const organicSearchBlocks = $(import_google_sr_selectors.GeneralSelector.block).toArray();
for (const element of organicSearchBlocks) {
let link = $(element).find(import_google_sr_selectors.OrganicSearchSelector.link).attr("href") ?? null;
const description = $(element).find(import_google_sr_selectors.OrganicSearchSelector.description).text();
const title = $(element).find(import_google_sr_selectors.OrganicSearchSelector.title).text();
link = extractUrlFromGoogleLink(link);
if (typeof link !== "string") continue;
if (isEmpty(strictSelector, description, title)) continue;
parsedResults.push({
type: ResultTypes.OrganicResult,
link,
description,
title
});
}
return parsedResults;
}, "OrganicResult");
var TranslateResult = /* @__PURE__ */ __name(($, strictSelector) => {
if (!$) throwNoCheerioError("TranslateResult");
const translateBlock = $(import_google_sr_selectors.GeneralSelector.block).first();
if (!translateBlock) return null;
const translatedFromTo = translateBlock.find(import_google_sr_selectors.TranslateSearchSelector.translateFromTo).text();
const fromTo = translatedFromTo.split(" to ");
if (fromTo.length !== 2) return null;
const sourceLanguage = fromTo[0].trim();
const translationLanguage = fromTo[1].trim();
const sourceTextBlock = translateBlock.find(import_google_sr_selectors.TranslateSearchSelector.sourceText).text().trim();
const sourceText = sourceTextBlock.match(TranslateSourceTextRegex)?.[1] ?? "";
const translatedText = translateBlock.find(import_google_sr_selectors.TranslateSearchSelector.translatedText).text().trim();
if (isEmpty(
strictSelector,
sourceLanguage,
translationLanguage,
sourceText,
translatedText
))
return null;
return {
type: ResultTypes.TranslateResult,
sourceLanguage,
translationLanguage,
sourceText,
translatedText
};
}, "TranslateResult");
var parseDefinitionBlock = /* @__PURE__ */ __name((definitionBlock) => {
const definitionTextBlock = definitionBlock.find(
import_google_sr_selectors.DictionarySearchSelector.definitionTextBlock
);
const definitionText = definitionTextBlock.eq(0).text().trim();
const example = definitionTextBlock.eq(1).text().trim();
const synonyms = definitionTextBlock.eq(2).text().trim().replace("synonyms: ", "").split(", ").filter((s) => s !== "");
if (!definitionText) return null;
const definition = {
definition: definitionText
};
if (example && example !== "") definition.example = example;
if (synonyms && synonyms.length > 0) definition.synonyms = synonyms;
return definition;
}, "parseDefinitionBlock");
var DictionaryResult = /* @__PURE__ */ __name(($, strictSelector) => {
if (!$) throwNoCheerioError("DictionaryResult");
const dictionaryBlock = $(import_google_sr_selectors.GeneralSelector.block).first();
if (!dictionaryBlock) return null;
const phonetic = dictionaryBlock.find(import_google_sr_selectors.DictionarySearchSelector.phonetic).first().text().trim();
const word = dictionaryBlock.find(import_google_sr_selectors.DictionarySearchSelector.word).text().trim();
const meanings = [];
const definitionContainer = dictionaryBlock.find(import_google_sr_selectors.DictionarySearchSelector.definitionsContainer).first();
if (!definitionContainer) return null;
const definitionBlocks = definitionContainer.find(import_google_sr_selectors.DictionarySearchSelector.definitionsBlock).toArray();
let partOfSpeech = null;
for (const definitionBlock of definitionBlocks) {
if (!partOfSpeech) {
partOfSpeech = $(definitionBlock).find(import_google_sr_selectors.DictionarySearchSelector.definitionPartOfSpeech).first().text().trim();
} else {
const definitionLists = $(definitionBlock).find(import_google_sr_selectors.DictionarySearchSelector.definitionList).toArray();
let definitions;
if (definitionLists.length > 0) {
definitions = definitionLists.map((item) => parseDefinitionBlock($(item))).filter((d) => d !== null);
} else {
const definition = parseDefinitionBlock($(definitionBlock));
if (definition) definitions = [definition];
else definitions = [];
}
if (definitions.length > 0) {
meanings.push({
partOfSpeech,
definitions
});
}
partOfSpeech = null;
}
}
if (isEmpty(strictSelector, phonetic, word)) return null;
return {
type: ResultTypes.DictionaryResult,
phonetic,
word,
meanings
};
}, "DictionaryResult");
var TimeResult = /* @__PURE__ */ __name(($, strictSelector) => {
if (!$) throwNoCheerioError("TimeResult");
const block = $(import_google_sr_selectors.TimeSearchSelector.block).first();
const location = block.find(import_google_sr_selectors.TimeSearchSelector.location).text();
if (location === "") return null;
const layoutTable = block.find(import_google_sr_selectors.TimeSearchSelector.timeLayoutTable).first();
if (!layoutTable) return null;
const time = layoutTable.find(import_google_sr_selectors.TimeSearchSelector.time).text();
const timeInWords = layoutTable.find(import_google_sr_selectors.TimeSearchSelector.timeInWords).text();
if (isEmpty(strictSelector, time, timeInWords)) return null;
return {
type: ResultTypes.TimeResult,
location,
time,
timeInWords
};
}, "TimeResult");
var CurrencyResult = /* @__PURE__ */ __name(($, strictSelector) => {
if (!$) throwNoCheerioError("CurrencyResult");
const block = $(import_google_sr_selectors.GeneralSelector.block).first();
const from = block.find(import_google_sr_selectors.CurrencyConvertSelector.from).text().replace("=", "").trim();
const to = block.find(import_google_sr_selectors.CurrencyConvertSelector.to).text().trim();
if (isEmpty(strictSelector, from, to)) return null;
return {
type: ResultTypes.CurrencyResult,
from,
to
};
}, "CurrencyResult");
var KnowledgePanelResult = /* @__PURE__ */ __name(($, strictSelector) => {
if (!$) throwNoCheerioError("KnowledgePanelResult");
const blocks = $(import_google_sr_selectors.GeneralSelector.block);
let knowledgePanel = null;
blocks.each((index, element) => {
if (index > 5) return false;
const block = $(element);
const headerContainer = block.find(import_google_sr_selectors.KnowledgePanelSelector.headerBlock);
const headerBlock = headerContainer.first();
const imageContainer = headerBlock.next();
if (!headerBlock) return;
const title = headerBlock.find(import_google_sr_selectors.KnowledgePanelSelector.title).text().trim();
const label = headerBlock.find(import_google_sr_selectors.KnowledgePanelSelector.label).text().trim();
const imageLink = imageContainer.find(import_google_sr_selectors.KnowledgePanelSelector.imageUrl).attr("src");
if (title === "" || label === "") return;
const descriptionBlock = block.find(
import_google_sr_selectors.KnowledgePanelSelector.descriptionBlock
);
const description = descriptionBlock.find("span").first().text().trim();
const sourceLink = descriptionBlock.find("a").attr("href");
const cleanSourceLink = extractUrlFromGoogleLink(sourceLink ?? null);
const metadataBlocks = block.find(import_google_sr_selectors.KnowledgePanelSelector.metadataBlock).toArray();
const metadata = [];
for (const metadataContainerElement of metadataBlocks) {
const metadataContainer = $(metadataContainerElement);
const label2 = metadataContainer.find(import_google_sr_selectors.KnowledgePanelSelector.metadataLabel).first().text().trim();
if (label2 === "") continue;
const value = metadataContainer.find(import_google_sr_selectors.KnowledgePanelSelector.metadataValue).text().trim();
if (value === "") continue;
metadata.push({
label: label2,
value
});
}
if (!isEmpty(strictSelector, title, description, label))
knowledgePanel = {
type: ResultTypes.KnowledgePanelResult,
title,
label,
description,
sourceLink: cleanSourceLink,
imageLink: imageLink ?? null,
metadata
};
return false;
});
return knowledgePanel;
}, "KnowledgePanelResult");
// src/search.ts
async function search(options) {
if (!options)
throw new TypeError(
`Search options must be provided. Received ${typeof options}`
);
const requestConfig = prepareRequestConfig(options);
const { data } = await (0, import_axios.default)(requestConfig);
const cheerioApi = (0, import_cheerio.load)(data);
const selectors = options.resultTypes || [OrganicResult];
let searchResults = [];
for (const selector of selectors) {
const result = selector(
cheerioApi,
Boolean(options.strictSelector)
);
if (result) searchResults = searchResults.concat(result);
}
return searchResults;
}
__name(search, "search");
async function searchWithPages(options) {
if (!options)
throw new TypeError(
`Search options must be provided. Received ${typeof options}`
);
if (typeof options.pages !== "number" && !Array.isArray(options.pages))
throw new TypeError(
`Page must be a number or an array of numbers. Received ${typeof options.pages}`
);
const searchResults = [];
const pages = Array.isArray(options.pages) ? options.pages : Array.from({ length: options.pages }, (_, i) => i * 10);
const baseRequestConfig = prepareRequestConfig(options);
const selectors = options.resultTypes || [OrganicResult];
for (const page of pages) {
baseRequestConfig.params.set("start", String(page));
const { data } = await (0, import_axios.default)(baseRequestConfig);
const cheerioApi = (0, import_cheerio.load)(data);
let pageResults = [];
for (const selector of selectors) {
const result = selector(
cheerioApi,
Boolean(options.strictSelector)
);
if (result) pageResults = pageResults.concat(result);
}
searchResults.push(pageResults);
}
return searchResults;
}
__name(searchWithPages, "searchWithPages");
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
CurrencyResult,
DictionaryResult,
KnowledgePanelResult,
OrganicResult,
ResultTypes,
TimeResult,
TranslateResult,
TranslateSourceTextRegex,
search,
searchWithPages
});