UNPKG

twdl

Version:

Downloads image and video tweets from Twitter

230 lines 10.1 kB
import logSymbols from "log-symbols"; import * as util from "../util.js"; import * as api from "../api.js"; import { parseTweetUrl } from "./twitterApi.js"; import bluebird from 'bluebird'; import _debug from 'debug'; import { CookieJar } from "tough-cookie"; const { join } = bluebird; const debug = _debug('twdl:nitter'); const NitterInstances = [ 'https://nitter.poast.org', 'https://nitter.privacydev.net', ]; let EnvInstances = process.env.TWDL_NITTER_INSTANCES; if (EnvInstances) { const instances = EnvInstances.trim().split(',').filter(Boolean), instancesText = EnvInstances.length > 50 ? EnvInstances.slice(0, 50) + '…' : EnvInstances; debug('Using custom Nitter instances: %s', instancesText); EnvInstances = instances; } export function getNitterOptions(getCustom) { const options = { uri: '', }; if (getCustom) { if (typeof getCustom === 'object') { Object.assign(options, getCustom); } else if (typeof getCustom === 'string') { options.uri = getCustom; } return options; } const instances = EnvInstances ?? NitterInstances, randomIndex = Math.floor(Math.random() * instances.length), instance = instances[randomIndex]; if (typeof instance === 'object') { Object.assign(options, instance); } else if (typeof instance === 'string') { options.uri = instance; } return options; } const gotOptions = { https: { rejectUnauthorized: false }, }; export function getProfileBio(tweetData, options) { const mediaData = util.newMediaData(), username = tweetData.username ?? util.getUsername(tweetData.finalUrl), nitterOptions = getNitterOptions(), url = `${nitterOptions.uri}/${username}`; function getBioData(jq) { const profileCard = jq('.profile-card'); mediaData.bio = profileCard.find('.profile-bio').text().trim(); mediaData.website = profileCard.find('.profile-website').text().trim(); mediaData.location = profileCard.find('.profile-location').text().trim(); mediaData.joined = profileCard.find('.profile-joindate').text().trim(); return mediaData; } return api.gotInstance.get(url, gotOptions) .then(api.loadCheerio) .then(getBioData) .catch(e => api.downloadError(e, api.RequestType.NitterBio)); } function fixImageUrl(imagePath) { function decodeBase64(match, group1) { const decoded = Buffer.from(group1, 'base64').toString('ascii'); return `/${decoded}`; } let uri = decodeURIComponent(imagePath); uri = uri.replace(/^\/pic\/enc\/([A-Za-z0-9/=]+)/, decodeBase64); uri = uri.replace(/^\/pic/, ''); uri = uri.replace('_bigger', ''); uri = uri.replace('?name=small', ''); uri = uri.replace(/&format=[a-z]+/i, ''); uri = "https://pbs.twimg.com" + uri; return uri; } export function getMedia(tweetData, options) { const parsedTweetUrl = parseTweetUrl(tweetData, options); function getMediaData(jq) { const tweetContainer = jq('.main-tweet').first(), tweet = tweetContainer.find('.timeline-item').first(), mediaContainer = tweet.find('.attachments').first(), mediaData = util.newMediaData(), errorPanel = jq('.error-panel'); if (errorPanel.length > 0) { const errorText = errorPanel.text().trim(); mediaData.error = new Error(errorText); mediaData.media = []; return mediaData; } else if (tweet.length === 0) { mediaData.error = new Error('Tweet not found'); mediaData.media = []; return mediaData; } mediaData.name = tweet.find('.fullname').first().text().trim(); mediaData.username = tweet.find('.username').first().text().trim().replace('@', ''); mediaData.avatar = fixImageUrl(tweet.find('.avatar').first().attr('src')); tweetData.username = mediaData.username; mediaData.finalUrl = jq.finalUrl; mediaData.isVideo = tweet.find('.attachment.video-container, .attachments.media-gif').length > 0; mediaData.text = tweet.find('.tweet-content').text().trim(); const dateText = tweet.find('.tweet-date a').first().attr('title')?.replace(' · ', ' '); if (dateText) { mediaData.date = new Date(dateText); mediaData.dateFormat = mediaData.date.toISOString(); } const getImages = () => mediaContainer.find('.attachment.image img') .map((i, el) => jq(el).attr('src')) .get() .map(fixImageUrl); mediaData.media = []; if (!mediaData.isVideo) { mediaData.media.push(...getImages()); } return join(mediaData, getProfileBio(tweetData, options), combineMediaData); } function combineMediaData(mediaData, bioData) { if (mediaData && bioData) { if (bioData.bio) mediaData.bio = bioData.bio; if (bioData.website) mediaData.website = bioData.website; if (bioData.location) mediaData.location = bioData.location; if (bioData.joined) mediaData.joined = bioData.joined; } return mediaData; } const nitterOptions = getNitterOptions(), url = `${nitterOptions.uri}/${parsedTweetUrl.username}/status/${parsedTweetUrl.statusId}`; console.log(`${logSymbols.info} Nitter URL: ${url}`); return api.gotInstance.get(url, gotOptions) .then(api.loadCheerio) .then(getMediaData) .then(getVideoData); async function getVideoData(mediaData) { if (mediaData.isVideo) { let videoUrl = ''; try { videoUrl = await getVideo(tweetData, options); } catch (e) { api.downloadError(e, api.RequestType.VideoUrl); } if (videoUrl) { mediaData.media.push(videoUrl); } } return mediaData; } } async function getVideo(tweetData, options) { const parsedTweetUrl = parseTweetUrl(tweetData, options), nitterOptions = getNitterOptions(), path = `/${parsedTweetUrl.username}/status/${parsedTweetUrl.statusId}`, url = `${nitterOptions.uri}/enablehls`; function getPlaylistPlaylistUrl(jq) { const video = jq('.main-tweet .attachments .attachment.video-container video').first(), videoDataUrl = video.attr('data-url'), videoSource = video.find('source').first(); if (videoSource.length > 0) { const videoUrl = videoSource.attr('src'); debug('Got video source URL: %s', videoUrl); return videoUrl; } if (videoDataUrl) { const playlistUrl = `${nitterOptions.uri}${videoDataUrl}`; debug('Got playlist playlist URL: %s', playlistUrl); return api.gotInstance.get(playlistUrl, gotOptions) .then(parsePlaylistPlaylist); } } function parsePlaylistPlaylist(response) { const { body: data } = response; const lines = data.split('\n'); let topPixels = 0; let videoPlaylistUrl = ''; for (let i = 0; i < lines.length; i++) { if (lines[i].includes('RESOLUTION=')) { const line = lines[i].trim(); const resolution = line.match(/RESOLUTION=([\d]+x[\d]+)/)[1]; const [width, height] = resolution.split('x').map(Number); const pixels = width * height; if (pixels > topPixels) { topPixels = pixels; const pathOnly = lines[i + 1].trim(); videoPlaylistUrl = pathOnly; } } } if (videoPlaylistUrl) { if (videoPlaylistUrl.includes('/enc/')) { const urlMatch = videoPlaylistUrl.match(/\/enc\/([A-Za-z0-9]+)\/([^/]+)/); if (urlMatch) { videoPlaylistUrl = decodeURIComponent(urlMatch[2]); const split = videoPlaylistUrl.split('_'); for (let i = 0; i < split.length; i++) { split[i] = Buffer.from(split[i], 'base64').toString('ascii'); } videoPlaylistUrl = split.join('?'); } } if (!videoPlaylistUrl.startsWith('https:')) { videoPlaylistUrl = `${nitterOptions.uri}${videoPlaylistUrl}`; } if (videoPlaylistUrl.includes('https%3A')) { const parsedUrl = new URL(videoPlaylistUrl); const urlMatch = parsedUrl.pathname.match(/https%3A[^ ]+/); if (urlMatch) { videoPlaylistUrl = decodeURIComponent(urlMatch[0]); } } debug('Got highest resolution playlist URL: %s', videoPlaylistUrl); return videoPlaylistUrl; } } function parsePlaylist(response) { const { body: data } = response; const regex = /#EXT-X-MAP:URI="(.+)"/; const match = data.match(regex); if (match) { const host = response.requestUrl.hostname.includes('video.twimg.com') ? `${response.requestUrl.protocol}//${response.requestUrl.hostname}` : nitterOptions.uri; const videoUrl = `${host}${match[1]}`; debug('Got video URL from playlist: %s', videoUrl); return videoUrl; } } const cookieJar = new CookieJar(); cookieJar.setCookieSync('hlsPlayback=on', url); return api.gotInstance.post(url, { ...gotOptions, form: { referer: path }, cookieJar: cookieJar, }) .then(api.loadCheerio) .then(getPlaylistPlaylistUrl); } //# sourceMappingURL=nitter.js.map