UNPKG

twdl

Version:

Downloads image and video tweets from Twitter

194 lines 7.29 kB
import * as got from 'got'; import logSymbols from 'log-symbols'; import mergeOptions from 'merge-options'; import cheerio from 'cheerio'; import { normalizeUrl } from './util.js'; import * as cache from './cache.js'; import { getNitterOptions } from './scrapers/nitter.js'; const userAgents = [ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.3', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3.1 Safari/605.1.1', ]; export function getUserAgent(useCustom) { return typeof useCustom === 'string' ? useCustom : userAgents[Math.floor(Math.random() * userAgents.length)]; } export function getRequestConfig(config, options, userAgent) { const newConfig = mergeOptions({ headers: { 'User-Agent': getUserAgent(userAgent) } }, config); if (typeof options !== 'undefined' && Object.prototype.hasOwnProperty.call(options, 'cookie') && options.cookie.length > 0) { newConfig.headers.Cookie = options.cookie; } return newConfig; } export var RequestType; (function (RequestType) { RequestType[RequestType["FinalUrl"] = 0] = "FinalUrl"; RequestType[RequestType["GetId"] = 1] = "GetId"; RequestType[RequestType["NitterMedia"] = 2] = "NitterMedia"; RequestType[RequestType["NitterBio"] = 3] = "NitterBio"; RequestType[RequestType["PuppeteerMedia"] = 4] = "PuppeteerMedia"; RequestType[RequestType["VideoUrl"] = 5] = "VideoUrl"; })(RequestType || (RequestType = {})); export function downloadError(err, requestType) { function getRequestTypeText() { switch (requestType) { case RequestType.FinalUrl: return 'Request to get tweet URL'; case RequestType.GetId: return 'Request to get user ID'; case RequestType.NitterMedia: return 'Nitter media download'; case RequestType.NitterBio: return 'Nitter bio download'; case RequestType.PuppeteerMedia: return 'Puppeteer media download'; case RequestType.VideoUrl: return 'Request to get video URL'; default: return "Something else"; } } const requestTypeText = getRequestTypeText(), statusCode = err?.statusCode || 0; if (err.name === 'HTTPError') { if (statusCode >= 400 && statusCode < 500) { console.log(`${logSymbols.error} ${requestTypeText} has failed. Tweet is probably deleted.`, statusCode); } else if (statusCode >= 500) { console.log(`${logSymbols.error} ${requestTypeText} has failed. There is a technical issue.`, statusCode); } else { console.log(`${logSymbols.error} ${requestTypeText} has failed. Unknown error.`, statusCode, err.message); } } else { throw err; } } function replaceNitterWithNew(url) { if (typeof url === 'string') { try { url = new URL(url); } catch (e) { return url; } } if (/nitter/i.test(url.hostname) === false) { return url; } const nitterOptions = getNitterOptions(); url.hostname = nitterOptions.uri.split('/')[2]; return url; } function shouldRetry(response) { const isError = response.statusCode >= 400 && response.statusCode < 600, isNitter = /nitter/i.test(response.url), isNotFound = response.body.toString().includes('Tweet not found'); return (isError && (isNitter && isNotFound) === false); } export const gotInstance = got.got.extend({ headers: { 'User-Agent': getUserAgent(), }, hooks: { beforeRequest: [ (options) => cache.readCache(options), ], afterResponse: [ (response, retryWithMergedOptions) => { if (shouldRetry(response)) { const newOptions = { headers: { 'User-Agent': getUserAgent(), }, }; const newUrl = replaceNitterWithNew(response.requestUrl); return retryWithMergedOptions({ ...newOptions, url: newUrl, }); } if (!response.request.options.resolveBodyOnly) { response.finalUrl = response.url; } return response; }, (response) => cache.writeCache(response), ], beforeRetry: [ error => { if (error?.request?.options?.url) { error.request.options.url = replaceNitterWithNew(error.request.options.url); console.log(`${logSymbols.warning} Retrying to download again: '${error.request.options.url}'`); } } ], beforeRedirect: [ async (options, response) => { if (response.statusCode === 302 && options.url.toString().includes('twitter.com')) { let finalRedirectUrl = response.headers.location.startsWith('/') ? options.url.toString() : response.headers.location; finalRedirectUrl = await normalizeUrl(finalRedirectUrl); options.url = finalRedirectUrl; options.followRedirect = false; } } ], }, retry: { methods: ['HEAD', 'GET', 'POST'], limit: 5, backoffLimit: 5000, statusCodes: [ 400, 404, 502, 530, ], errorCodes: [ 'ETIMEDOUT', 'ECONNRESET', 'EADDRINUSE', 'ECONNREFUSED', 'EPIPE', 'ENOTFOUND', 'ENETUNREACH', 'EAI_AGAIN', 'ERR_GOT_REQUEST_ERROR', ], }, timeout: { request: 5000, }, }); export function getFinalUrl(url) { function getLocation(response) { const body = response.body; if (body && body.length > 0) { const match = body.match(/content="[0-9];URL=([^"]+)"/i); if (match && match.length > 1) { return match[1]; } } const headers = response.headers; if (headers && headers.location && headers.location.includes('twitter.com')) { return headers.location; } return response.url; } function getFinalUrl(response) { return response.url; } function decodeTco(permalink) { return gotInstance.get(permalink ?? url, { followRedirect: !!permalink }); } if (url.includes('twitter.com')) { return normalizeUrl(url); } return decodeTco() .then(getLocation) .then(normalizeUrl) .then(decodeTco) .then(getFinalUrl); } export function loadCheerio(response) { const jq = cheerio.load(response.body); jq.finalUrl = response.url; return jq; } //# sourceMappingURL=api.js.map