twdl
Version:
Downloads image and video tweets from Twitter
230 lines • 10.1 kB
JavaScript
import logSymbols from "log-symbols";
import * as util from "../util.js";
import * as api from "../api.js";
import { parseTweetUrl } from "./twitterApi.js";
import bluebird from 'bluebird';
import _debug from 'debug';
import { CookieJar } from "tough-cookie";
const { join } = bluebird;
const debug = _debug('twdl:nitter');
const NitterInstances = [
'https://nitter.poast.org',
'https://nitter.privacydev.net',
];
let EnvInstances = process.env.TWDL_NITTER_INSTANCES;
if (EnvInstances) {
const instances = EnvInstances.trim().split(',').filter(Boolean), instancesText = EnvInstances.length > 50
? EnvInstances.slice(0, 50) + '…' : EnvInstances;
debug('Using custom Nitter instances: %s', instancesText);
EnvInstances = instances;
}
export function getNitterOptions(getCustom) {
const options = {
uri: '',
};
if (getCustom) {
if (typeof getCustom === 'object') {
Object.assign(options, getCustom);
}
else if (typeof getCustom === 'string') {
options.uri = getCustom;
}
return options;
}
const instances = EnvInstances ?? NitterInstances, randomIndex = Math.floor(Math.random() * instances.length), instance = instances[randomIndex];
if (typeof instance === 'object') {
Object.assign(options, instance);
}
else if (typeof instance === 'string') {
options.uri = instance;
}
return options;
}
const gotOptions = {
https: { rejectUnauthorized: false },
};
export function getProfileBio(tweetData, options) {
const mediaData = util.newMediaData(), username = tweetData.username ?? util.getUsername(tweetData.finalUrl), nitterOptions = getNitterOptions(), url = `${nitterOptions.uri}/${username}`;
function getBioData(jq) {
const profileCard = jq('.profile-card');
mediaData.bio = profileCard.find('.profile-bio').text().trim();
mediaData.website = profileCard.find('.profile-website').text().trim();
mediaData.location = profileCard.find('.profile-location').text().trim();
mediaData.joined = profileCard.find('.profile-joindate').text().trim();
return mediaData;
}
return api.gotInstance.get(url, gotOptions)
.then(api.loadCheerio)
.then(getBioData)
.catch(e => api.downloadError(e, api.RequestType.NitterBio));
}
function fixImageUrl(imagePath) {
function decodeBase64(match, group1) {
const decoded = Buffer.from(group1, 'base64').toString('ascii');
return `/${decoded}`;
}
let uri = decodeURIComponent(imagePath);
uri = uri.replace(/^\/pic\/enc\/([A-Za-z0-9/=]+)/, decodeBase64);
uri = uri.replace(/^\/pic/, '');
uri = uri.replace('_bigger', '');
uri = uri.replace('?name=small', '');
uri = uri.replace(/&format=[a-z]+/i, '');
uri = "https://pbs.twimg.com" + uri;
return uri;
}
export function getMedia(tweetData, options) {
const parsedTweetUrl = parseTweetUrl(tweetData, options);
function getMediaData(jq) {
const tweetContainer = jq('.main-tweet').first(), tweet = tweetContainer.find('.timeline-item').first(), mediaContainer = tweet.find('.attachments').first(), mediaData = util.newMediaData(), errorPanel = jq('.error-panel');
if (errorPanel.length > 0) {
const errorText = errorPanel.text().trim();
mediaData.error = new Error(errorText);
mediaData.media = [];
return mediaData;
}
else if (tweet.length === 0) {
mediaData.error = new Error('Tweet not found');
mediaData.media = [];
return mediaData;
}
mediaData.name = tweet.find('.fullname').first().text().trim();
mediaData.username = tweet.find('.username').first().text().trim().replace('@', '');
mediaData.avatar = fixImageUrl(tweet.find('.avatar').first().attr('src'));
tweetData.username = mediaData.username;
mediaData.finalUrl = jq.finalUrl;
mediaData.isVideo = tweet.find('.attachment.video-container, .attachments.media-gif').length > 0;
mediaData.text = tweet.find('.tweet-content').text().trim();
const dateText = tweet.find('.tweet-date a').first().attr('title')?.replace(' · ', ' ');
if (dateText) {
mediaData.date = new Date(dateText);
mediaData.dateFormat = mediaData.date.toISOString();
}
const getImages = () => mediaContainer.find('.attachment.image img')
.map((i, el) => jq(el).attr('src'))
.get()
.map(fixImageUrl);
mediaData.media = [];
if (!mediaData.isVideo) {
mediaData.media.push(...getImages());
}
return join(mediaData, getProfileBio(tweetData, options), combineMediaData);
}
function combineMediaData(mediaData, bioData) {
if (mediaData && bioData) {
if (bioData.bio)
mediaData.bio = bioData.bio;
if (bioData.website)
mediaData.website = bioData.website;
if (bioData.location)
mediaData.location = bioData.location;
if (bioData.joined)
mediaData.joined = bioData.joined;
}
return mediaData;
}
const nitterOptions = getNitterOptions(), url = `${nitterOptions.uri}/${parsedTweetUrl.username}/status/${parsedTweetUrl.statusId}`;
console.log(`${logSymbols.info} Nitter URL: ${url}`);
return api.gotInstance.get(url, gotOptions)
.then(api.loadCheerio)
.then(getMediaData)
.then(getVideoData);
async function getVideoData(mediaData) {
if (mediaData.isVideo) {
let videoUrl = '';
try {
videoUrl = await getVideo(tweetData, options);
}
catch (e) {
api.downloadError(e, api.RequestType.VideoUrl);
}
if (videoUrl) {
mediaData.media.push(videoUrl);
}
}
return mediaData;
}
}
async function getVideo(tweetData, options) {
const parsedTweetUrl = parseTweetUrl(tweetData, options), nitterOptions = getNitterOptions(), path = `/${parsedTweetUrl.username}/status/${parsedTweetUrl.statusId}`, url = `${nitterOptions.uri}/enablehls`;
function getPlaylistPlaylistUrl(jq) {
const video = jq('.main-tweet .attachments .attachment.video-container video').first(), videoDataUrl = video.attr('data-url'), videoSource = video.find('source').first();
if (videoSource.length > 0) {
const videoUrl = videoSource.attr('src');
debug('Got video source URL: %s', videoUrl);
return videoUrl;
}
if (videoDataUrl) {
const playlistUrl = `${nitterOptions.uri}${videoDataUrl}`;
debug('Got playlist playlist URL: %s', playlistUrl);
return api.gotInstance.get(playlistUrl, gotOptions)
.then(parsePlaylistPlaylist);
}
}
function parsePlaylistPlaylist(response) {
const { body: data } = response;
const lines = data.split('\n');
let topPixels = 0;
let videoPlaylistUrl = '';
for (let i = 0; i < lines.length; i++) {
if (lines[i].includes('RESOLUTION=')) {
const line = lines[i].trim();
const resolution = line.match(/RESOLUTION=([\d]+x[\d]+)/)[1];
const [width, height] = resolution.split('x').map(Number);
const pixels = width * height;
if (pixels > topPixels) {
topPixels = pixels;
const pathOnly = lines[i + 1].trim();
videoPlaylistUrl = pathOnly;
}
}
}
if (videoPlaylistUrl) {
if (videoPlaylistUrl.includes('/enc/')) {
const urlMatch = videoPlaylistUrl.match(/\/enc\/([A-Za-z0-9]+)\/([^/]+)/);
if (urlMatch) {
videoPlaylistUrl = decodeURIComponent(urlMatch[2]);
const split = videoPlaylistUrl.split('_');
for (let i = 0; i < split.length; i++) {
split[i] = Buffer.from(split[i], 'base64').toString('ascii');
}
videoPlaylistUrl = split.join('?');
}
}
if (!videoPlaylistUrl.startsWith('https:')) {
videoPlaylistUrl = `${nitterOptions.uri}${videoPlaylistUrl}`;
}
if (videoPlaylistUrl.includes('https%3A')) {
const parsedUrl = new URL(videoPlaylistUrl);
const urlMatch = parsedUrl.pathname.match(/https%3A[^ ]+/);
if (urlMatch) {
videoPlaylistUrl = decodeURIComponent(urlMatch[0]);
}
}
debug('Got highest resolution playlist URL: %s', videoPlaylistUrl);
return videoPlaylistUrl;
}
}
function parsePlaylist(response) {
const { body: data } = response;
const regex = /#EXT-X-MAP:URI="(.+)"/;
const match = data.match(regex);
if (match) {
const host = response.requestUrl.hostname.includes('video.twimg.com')
? `${response.requestUrl.protocol}//${response.requestUrl.hostname}`
: nitterOptions.uri;
const videoUrl = `${host}${match[1]}`;
debug('Got video URL from playlist: %s', videoUrl);
return videoUrl;
}
}
const cookieJar = new CookieJar();
cookieJar.setCookieSync('hlsPlayback=on', url);
return api.gotInstance.post(url, {
...gotOptions,
form: { referer: path },
cookieJar: cookieJar,
})
.then(api.loadCheerio)
.then(getPlaylistPlaylistUrl);
}
//# sourceMappingURL=nitter.js.map