duck-duck-scrape
Version:
Search from DuckDuckGo and use it's spice APIs.
221 lines (220 loc) • 9.14 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.autocomplete = exports.search = void 0;
const html_entities_1 = require("html-entities");
const needle_1 = __importDefault(require("needle"));
const util_1 = require("../util");
const defaultOptions = {
safeSearch: util_1.SafeSearchType.OFF,
time: util_1.SearchTimeType.ALL,
locale: 'en-us',
region: 'wt-wt',
offset: 0,
marketRegion: 'en-US'
};
const SEARCH_REGEX = /DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load(?:Module)?\('/;
const IMAGES_REGEX = /;DDG\.duckbar\.load\('images', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.load\('news/;
const NEWS_REGEX = /;DDG\.duckbar\.load\('news', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.load\('videos/;
const VIDEOS_REGEX = /;DDG\.duckbar\.load\('videos', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.loadModule\('related_searches/;
const RELATED_SEARCHES_REGEX = /DDG\.duckbar\.loadModule\('related_searches', ({"ads":.+"vqd":{".+":"\d-\d+-\d+"}})\);DDG\.duckbar\.load\('products/;
/**
* Search something.
* @category Search
* @param query The query to search with
* @param options The options of the search
* @param needleOptions The options of the HTTP request
* @returns Search results
*/
async function search(query, options, needleOptions) {
if (!query)
throw new Error('Query cannot be empty!');
if (!options)
options = defaultOptions;
else
options = sanityCheck(options);
let vqd = options.vqd;
if (!vqd)
vqd = await (0, util_1.getVQD)(query, 'web', needleOptions || { headers: util_1.COMMON_HEADERS });
/* istanbul ignore next */
const queryObject = {
q: query,
...(options.safeSearch !== util_1.SafeSearchType.STRICT ? { t: 'D' } : {}),
l: options.locale,
...(options.safeSearch === util_1.SafeSearchType.STRICT ? { p: '1' } : {}),
kl: options.region || 'wt-wt',
s: String(options.offset),
dl: 'en',
ct: 'US',
bing_market: options.marketRegion,
df: options.time,
vqd,
...(options.safeSearch !== util_1.SafeSearchType.STRICT ? { ex: String(options.safeSearch) } : {}),
sp: '1',
bpa: '1',
biaexp: 'b',
msvrtexp: 'b',
...(options.safeSearch === util_1.SafeSearchType.STRICT
? {
videxp: 'a',
nadse: 'b',
eclsexp: 'a',
stiaexp: 'a',
tjsexp: 'b',
related: 'b',
msnexp: 'a'
}
: {
nadse: 'b',
eclsexp: 'b',
tjsexp: 'b'
// cdrexp: 'b'
})
};
const response = await (0, needle_1.default)('get', `https://links.duckduckgo.com/d.js?${(0, util_1.queryString)(queryObject)}`, needleOptions);
if (response.body.includes('DDG.deep.is506'))
throw new Error('A server error occurred!');
if (response.body.toString().includes('DDG.deep.anomalyDetectionBlock'))
throw new Error('DDG detected an anomaly in the request, you are likely making requests too quickly.');
const searchResults = JSON.parse(SEARCH_REGEX.exec(response.body)[1].replace(/\t/g, ' '));
// check for no results
if (searchResults.length === 1 && !('n' in searchResults[0])) {
const onlyResult = searchResults[0];
/* istanbul ignore next */
if ((!onlyResult.da && onlyResult.t === 'EOF') || !onlyResult.a || onlyResult.d === 'google.com search')
return {
noResults: true,
vqd,
results: []
};
}
const results = {
noResults: false,
vqd,
results: []
};
// Populate search results
for (const search of searchResults) {
if ('n' in search)
continue;
let bang;
if (search.b) {
const [prefix, title, domain] = search.b.split('\t');
bang = { prefix, title, domain };
}
results.results.push({
title: search.t,
description: (0, html_entities_1.decode)(search.a),
rawDescription: search.a,
hostname: search.i,
icon: `https://external-content.duckduckgo.com/ip3/${search.i}.ico`,
url: search.u,
bang
});
}
// Images
const imagesMatch = IMAGES_REGEX.exec(response.body);
if (imagesMatch) {
const imagesResult = JSON.parse(imagesMatch[1].replace(/\t/g, ' '));
results.images = imagesResult.results.map((i) => {
i.title = (0, html_entities_1.decode)(i.title);
return i;
});
}
// News
const newsMatch = NEWS_REGEX.exec(response.body);
if (newsMatch) {
const newsResult = JSON.parse(newsMatch[1].replace(/\t/g, ' '));
results.news = newsResult.results.map((article) => ({
date: article.date,
excerpt: (0, html_entities_1.decode)(article.excerpt),
image: article.image,
relativeTime: article.relative_time,
syndicate: article.syndicate,
title: (0, html_entities_1.decode)(article.title),
url: article.url,
isOld: !!article.is_old
}));
}
// Videos
const videosMatch = VIDEOS_REGEX.exec(response.body);
if (videosMatch) {
const videoResult = JSON.parse(videosMatch[1].replace(/\t/g, ' '));
results.videos = [];
/* istanbul ignore next */
for (const video of videoResult.results) {
results.videos.push({
url: video.content,
title: (0, html_entities_1.decode)(video.title),
description: (0, html_entities_1.decode)(video.description),
image: video.images.large || video.images.medium || video.images.small || video.images.motion,
duration: video.duration,
publishedOn: video.publisher,
published: video.published,
publisher: video.uploader,
viewCount: video.statistics.viewCount || undefined
});
}
}
// Related Searches
const relatedMatch = RELATED_SEARCHES_REGEX.exec(response.body);
if (relatedMatch) {
const relatedResult = JSON.parse(relatedMatch[1].replace(/\t/g, ' '));
results.related = [];
for (const related of relatedResult.results) {
results.related.push({
text: related.text,
raw: related.display_text
});
}
}
// TODO: Products
return results;
}
exports.search = search;
function sanityCheck(options) {
options = Object.assign({}, defaultOptions, options);
if (!(options.safeSearch in util_1.SafeSearchType))
throw new TypeError(`${options.safeSearch} is an invalid safe search type!`);
/* istanbul ignore next */
if (typeof options.safeSearch === 'string')
options.safeSearch = util_1.SafeSearchType[options.safeSearch];
if (typeof options.offset !== 'number')
throw new TypeError(`Search offset is not a number!`);
if (options.offset < 0)
throw new RangeError('Search offset cannot be below zero!');
if (options.time &&
!Object.values(util_1.SearchTimeType).includes(options.time) &&
!/\d{4}-\d{2}-\d{2}..\d{4}-\d{2}-\d{2}/.test(options.time))
throw new TypeError(`${options.time} is an invalid search time!`);
if (!options.locale || typeof options.locale !== 'string')
throw new TypeError('Search locale must be a string!');
if (!options.region || typeof options.region !== 'string')
throw new TypeError('Search region must be a string!');
if (!options.marketRegion || typeof options.marketRegion !== 'string')
throw new TypeError('Search market region must be a string!');
if (options.vqd && !/\d-\d+-\d+/.test(options.vqd))
throw new Error(`${options.vqd} is an invalid VQD!`);
return options;
}
/**
* Get auto-complete terms from a query.
* @category Search
* @param query The query to search
* @param region The region to search as
* @param needleOptions The options of the HTTP request
* @returns Autocomplete terms
*/
async function autocomplete(query, region, needleOptions) {
if (!query)
throw new Error('Query cannot be empty!');
const queryObject = {
q: query,
kl: region || 'wt-wt'
};
const response = await (0, needle_1.default)('get', `https://duckduckgo.com/ac/?${(0, util_1.queryString)(queryObject)}`, needleOptions);
return (0, util_1.ensureJSON)(response.body);
}
exports.autocomplete = autocomplete;