twdl
Version:
Downloads image and video tweets from Twitter
178 lines • 7.55 kB
JavaScript
import puppeteer from 'puppeteer';
import logSymbols from 'log-symbols';
import { newMediaData, noOp } from '../util.js';
import { setTimeout } from "node:timers/promises";
import _debug from 'debug';
const debug = _debug('twdl:puppeteer');
let _browser = null;
export function cleanBrowser() {
return getBrowser().then((browser) => {
browser.close();
return _browser = null;
});
}
const envOptions = process.env.TWDL_PUPPETEER_OPTS;
export async function getBrowser(launchOptions) {
launchOptions = launchOptions || {};
if (envOptions) {
const parsed = JSON.parse(envOptions);
Object.assign(launchOptions, parsed);
debug('Launch options for Puppeteer: %j', launchOptions);
}
if (_browser === null) {
_browser = await puppeteer.launch(launchOptions);
}
return _browser;
}
function getEnglishUrl(tweetUrl) {
return tweetUrl.split('?')[0] + '?lang=en';
}
function getVideoUrls(jsonResponses = []) {
const videoUrls = [];
jsonResponses.forEach((json) => {
json.data?.threaded_conversation_with_injections_v2?.instructions?.forEach((instruction) => {
instruction.entries?.forEach(searchEntry);
});
if (json.data?.tweetResult) {
searchEntry(json.data);
}
});
return videoUrls;
function searchEntry(entry) {
const results = entry.content?.itemContent?.tweet_results || entry.tweetResult;
results?.result?.legacy?.entities?.media?.forEach((media) => {
const variants = media.video_info?.variants?.filter((i) => i.bitrate) || [];
variants.sort((a, b) => b.bitrate - a.bitrate);
const videoUrl = variants?.[0]?.url;
if (videoUrl) {
debug('Found video URL: %s', videoUrl);
videoUrls.push(videoUrl);
return;
}
const m3u8Url = media.video_info?.variants?.find((i) => i.url.includes(".m3u8"))?.url;
if (m3u8Url) {
debug('Found m3u8 URL: %s', m3u8Url);
videoUrls.push(m3u8Url);
}
});
}
}
export async function getMedia(tweetData, options) {
const browser = await getBrowser();
const page = await browser.newPage();
if (options.cookie) {
let cookie = [];
try {
cookie = JSON.parse(options.cookie);
debug('Loading cookies: %j', cookie);
}
catch (e) {
debug('Failed to parse cookie: %s', e);
}
await page.setCookie(...cookie);
}
const mediaData = newMediaData();
mediaData.media = [];
page.on('response', watchForPlaylistUrl);
const jsonResponses = [];
async function watchForPlaylistUrl(response) {
if (response.request().method() === 'OPTIONS')
return;
const url = response.url();
if (/(TweetResultByRestId|TweetDetail)/.test(url)) {
debug('Found JSON URL: %s…', url.substr(0, 100));
const json = await response.json();
jsonResponses.push(json);
}
}
await page.goto(getEnglishUrl(tweetData.finalUrl));
page.waitForSelector('div[data-testid="primaryColumn"] > div > div > div > div > div + div[role="button"]')
.then((refreshElement) => refreshElement.click(), noOp);
let article = null;
try {
article = await page.waitForSelector('div[aria-label="Timeline: Conversation"] > div > div:first-child > div > div > article');
}
catch (err) {
if (err.name === 'TimeoutError') {
if (!options.cookie)
console.log(`${logSymbols.warning} You can use cookies to bypass restricted content, see Wiki for more info.`);
throw new Error('Selector error, tweet is not found. Check twdl for updates.');
}
}
article.$('div[data-testid="tweet"]:not(.r-d0pm55) > div > div > div > div + div > div[role="button"]')
.then((viewButton) => viewButton && viewButton.click(), noOp);
const source = await article.$('div[dir] > a[href*="#source-labels"]'), dateHandle = await page.waitForSelector('a[aria-label*=" · "]:has(> time[datetime])'), dateText = (await page.evaluate((e) => e.innerText, dateHandle)).replace(' · ', ' ');
mediaData.timestamp = Date.parse(dateText);
mediaData.date = new Date(mediaData.timestamp);
mediaData.dateFormat = mediaData.date.toISOString();
const nameParts = await article.$$('div[data-testid="User-Name"] > div'), nameElement = nameParts[0], usernameElement = nameParts[nameParts.length - 1];
mediaData.name = await page.evaluate((e) => e.innerText, nameElement);
mediaData.username = await page.evaluate((e) => e.innerText.replace('@', ''), usernameElement);
mediaData.userId = undefined;
const textElement = await article.$('div[lang][dir]');
if (textElement != null) {
mediaData.text = await page.evaluate((e) => e.innerText, textElement);
}
const quoteMedia = [], images = await article.$$('img[draggable="true"]'), quoteImages = await article.$$('div[role="blockquote"] img[draggable="true"]');
mediaData.isVideo = await article.$$eval('div[data-testid="videoComponent"]', (els) => els.length > 0);
mediaData.avatar = await page.evaluate((e) => e.src.replace(/_(bigger|normal)/, ''), images[0]);
await pushImages(images);
await pushImages(quoteImages);
async function pushImages(arr) {
for (const img of arr) {
const src = await page.evaluate((e) => e.src, img);
if (!src.includes('profile_images'))
mediaData.media.push(src);
}
}
mediaData.media = mediaData.media.filter(function (val) {
return quoteMedia.indexOf(val) < 0;
});
if (jsonResponses.length > 0) {
debug('Trying to get video data');
try {
mediaData.media.push(...getVideoUrls(jsonResponses));
}
catch (err) {
}
}
const profile = await nameElement.$('a[role="link"]');
await profile.click();
try {
const button = await page.waitForSelector("xpath//div[@role='button' and contains(string(), 'Yes, view profile')]", { timeout: 5e3 });
if (button) {
await page.evaluate((el) => el.click(), button[0]);
}
}
catch (err) {
}
await page.waitForSelector('nav[aria-label="Profile timelines"]');
try {
const bioElement = await page.$('div[data-testid="UserDescription"]');
mediaData.bio = await page.evaluate((e) => e.innerText, bioElement);
}
catch (err) {
console.error(`${logSymbols.warning} No bio description detected`);
}
await setTimeout(500);
const headerItems = await page.$$eval('div[data-testid="UserProfileHeader_Items"] > *', (els) => {
return els.map((e) => e.tagName === 'A' ? `${e.href} (${e.innerText})` : e.innerText);
});
headerItems.forEach((item) => {
if (item.startsWith('https:')) {
mediaData.website = item;
}
else if (item.startsWith('Born')) {
mediaData.birthday = item;
}
else if (item.startsWith('Joined')) {
mediaData.joined = item;
}
else {
mediaData.location = item;
}
});
await page.close();
return mediaData;
}
//# sourceMappingURL=puppeteer.js.map