google-search-ts
Version:
A TypeScript library for performing Google searches with support for proxy, pagination, and customization
130 lines • 6.52 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.GoogleSearch = void 0;
const axios_1 = __importDefault(require("axios"));
const cheerio = __importStar(require("cheerio"));
class GoogleSearch {
static getRandomUserAgent() {
const lynxVersion = `Lynx/${2 + Math.floor(Math.random() * 2)}.${8 + Math.floor(Math.random() * 2)}.${Math.floor(Math.random() * 3)}`;
const libwwwVersion = `libwww-FM/${2 + Math.floor(Math.random() * 2)}.${13 + Math.floor(Math.random() * 3)}`;
const sslMmVersion = `SSL-MM/${1 + Math.floor(Math.random())}.${3 + Math.floor(Math.random() * 3)}`;
const opensslVersion = `OpenSSL/${1 + Math.floor(Math.random() * 3)}.${Math.floor(Math.random() * 5)}.${Math.floor(Math.random() * 10)}`;
return `${lynxVersion} ${libwwwVersion} ${sslMmVersion} ${opensslVersion}`;
}
static makeRequest(term_1) {
return __awaiter(this, arguments, void 0, function* (term, options = {}) {
const { numResults = 10, lang = 'en', proxy, timeout = 5000, safe = 'active', region, start = 0, } = options;
const url = 'https://www.google.com/search';
const params = new URLSearchParams(Object.assign({ q: term, num: (numResults + 2).toString(), hl: lang, start: start.toString(), safe: safe }, (region && { gl: region })));
const headers = {
'User-Agent': GoogleSearch.getRandomUserAgent(),
'Accept': '*/*',
'Cookie': 'CONSENT=PENDING+987; SOCS=CAESHAgBEhIaAB'
};
const axiosConfig = Object.assign(Object.assign({ headers,
timeout }, (proxy && {
proxy: {
protocol: proxy.startsWith('https') ? 'https' : 'http',
host: new URL(proxy).hostname,
port: parseInt(new URL(proxy).port) || (proxy.startsWith('https') ? 443 : 80)
}
})), { validateStatus: (status) => status === 200, maxRedirects: 5, decompress: true });
try {
const response = yield axios_1.default.get(`${url}?${params.toString()}`, axiosConfig);
return response.data;
}
catch (error) {
if (axios_1.default.isAxiosError(error)) {
throw new Error(`Google search request failed: ${error.message}`);
}
throw error;
}
});
}
static parseResults(html, unique = false) {
const $ = cheerio.load(html);
const results = [];
const seenUrls = new Set();
console.log('First 1000 characters of HTML:', html.substring(0, 1000));
const resultBlocks = $('div.g, div.ezO2md, div.MjjYud');
resultBlocks.each((_, element) => {
console.log('Processing result block:', $(element).html());
const linkElement = $(element).find('a[href]').first();
const titleElement = $(element).find('h3, span.CVA68e').first();
const descriptionElement = $(element).find('div.VwiC3b, span.FrIlee, div.s').first();
if (linkElement.length && titleElement.length) {
const rawUrl = linkElement.attr('href');
if (rawUrl) {
const url = decodeURIComponent(rawUrl.startsWith('/url?q=') ?
rawUrl.split('&')[0].replace('/url?q=', '') :
rawUrl);
if (unique && seenUrls.has(url)) {
return;
}
seenUrls.add(url);
if (url.startsWith('http')) {
results.push({
url,
title: titleElement.text().trim(),
description: descriptionElement.text().trim() || '',
});
}
}
}
});
return results;
}
static search(term_1) {
return __awaiter(this, arguments, void 0, function* (term, options = {}) {
const html = yield GoogleSearch.makeRequest(term, options);
return GoogleSearch.parseResults(html, options.unique);
});
}
}
exports.GoogleSearch = GoogleSearch;
//# sourceMappingURL=googleSearch.js.map