duckduckgo-search
Version:
Search for words, documents, images, videos, news, maps and text translation using the DuckDuckGo.com search engine.
288 lines (254 loc) • 6.57 kB
JavaScript
const axios = require("axios");
// Simulating the sleep function
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
// Simulating the httpx._exceptions.HTTPError class
class HTTPError extends Error {
constructor(message) {
super(message);
this.name = "HTTPError";
}
}
// Simulating the unescape function
function unescape(text) {
// Replace " with "
return text.replace(/"/g, '"');
}
// Simulating the re.sub function
function sub(pattern, replacement, text) {
return text.replace(pattern, replacement);
}
// Simulating the unquote function
function unquote(url) {
return url; // Simulating unquoting
}
const REGEX_STRIP_TAGS = /<[^>]*>/g;
// Simulating the main class
class SearchApi {
constructor() {
// Simulating the logger
this.logger = console;
}
async *images(
keywords,
region = "wt-wt",
safesearch = "moderate",
timelimit = null,
size = null,
color = null,
type_image = null,
layout = null,
license_image = null
) {
if (!keywords) {
throw new Error("Keywords are mandatory");
}
const vqd = await this._getVqd(keywords);
if (!vqd) {
throw new Error("Error in getting vqd");
}
const safesearchBase = { on: 1, moderate: 1, off: -1 };
timelimit = timelimit ? `time:${timelimit}` : "";
size = size ? `size:${size}` : "";
color = color ? `color:${color}` : "";
type_image = type_image ? `type:${type_image}` : "";
layout = layout ? `layout:${layout}` : "";
license_image = license_image ? `license:${license_image}` : "";
const payload = {
l: region,
o: "json",
s: 0,
q: keywords,
vqd: vqd,
f: `${timelimit},${size},${color},${type_image},${layout},${license_image}`,
p: safesearchBase[safesearch.toLowerCase()],
};
const cache = new Set();
for (let _ = 0; _ < 10; _++) {
const resp = await this._getUrl(
"GET",
"https://duckduckgo.com/i.js",
payload
);
if (!resp) {
break;
}
try {
const respJson = resp.data;
const pageData = respJson.results;
if (!pageData) {
break;
}
let resultExists = false;
for (const row of pageData) {
const image_url = row.image;
if (image_url && !cache.has(image_url)) {
cache.add(image_url);
resultExists = true;
yield {
title: row.title,
image: this._normalizeUrl(image_url),
thumbnail: this._normalizeUrl(row.thumbnail),
url: this._normalizeUrl(row.url),
height: row.height,
width: row.width,
source: row.source,
};
}
}
const next = respJson.next;
if (next) {
payload.s = next.split("s=")[1].split("&")[0];
}
if (!next || !resultExists) {
break;
}
} catch (error) {
break;
}
}
}
async *text(
keywords,
region = "wt-wt",
safesearch = "moderate",
timelimit = null
) {
if (!keywords) {
throw new Error("Keywords are mandatory");
}
const vqd = await this._getVqd(keywords);
if (!vqd) {
throw new Error("Error in getting vqd");
}
const payload = {
q: keywords,
kl: region,
l: region,
s: 0,
df: timelimit,
vqd: vqd,
o: "json",
sp: "0",
};
safesearch = safesearch.toLowerCase();
if (safesearch === "moderate") {
payload.ex = "-1";
} else if (safesearch === "off") {
payload.ex = "-2";
} else if (safesearch === "on") {
payload.p = "1";
}
const cache = new Set();
const searchPositions = ["0", "20", "70", "120"];
for (const s of searchPositions) {
payload.s = s;
const resp = await this._getUrl(
"GET",
"https://links.duckduckgo.com/d.js",
payload
);
if (!resp) {
break;
}
try {
const pageData = resp.data.results;
if (!pageData) {
break;
}
let resultExists = false;
for (const row of pageData) {
const href = row.u;
if (
href &&
!cache.has(href) &&
href !== `http://www.google.com/search?q=${keywords}`
) {
cache.add(href);
const body = this._normalize(row.a);
if (body) {
resultExists = true;
yield {
title: this._normalize(row.t),
href: this._normalizeUrl(href),
body: body,
};
}
}
}
if (!resultExists) {
break;
}
} catch (error) {
break;
}
}
}
async _getUrl(method, url, params) {
for (let i = 0; i < 3; i++) {
try {
const resp = await axios.request({
method,
url,
[method === "GET" ? "params" : "data"]: params,
});
if (this._is500InUrl(resp.config.url) || resp.status === 202) {
throw new HTTPError("");
}
if (resp.status === 200) {
return resp;
}
} catch (ex) {
this.logger.warning(`_getUrl() ${url} ${ex.name} ${ex.message}`);
if (i >= 2 || ex.message.includes("418")) {
throw ex;
}
}
await sleep(3000);
}
return null;
}
async _getVqd(keywords) {
try {
const resp = await this._getUrl("GET", "https://duckduckgo.com", {
q: keywords,
});
if (resp) {
for (const [c1, c2] of [
['vqd="', '"'],
["vqd=", "&"],
["vqd='", "'"],
]) {
try {
const start = resp.data.indexOf(c1) + c1.length;
const end = resp.data.indexOf(c2, start);
return resp.data.substring(start, end);
} catch (error) {
this.logger.warning(`_getVqd() keywords=${keywords} vqd not found`);
}
}
}
} catch (error) {
console.error("eyyy", error);
// Handle error
}
return null;
}
_is500InUrl(url) {
return url.includes("500");
}
_normalize(rawHtml) {
if (rawHtml) {
return unescape(sub(REGEX_STRIP_TAGS, "", rawHtml));
}
return "";
}
_normalizeUrl(url) {
if (url) {
return unquote(url).replace(" ", "+");
}
return "";
}
}
module.exports = new SearchApi();