UNPKG

media-scraper

Version:

TypeScript-first multi-platform social media scraper without API keys

89 lines (88 loc) 3.93 kB
import { $fetch } from "ofetch"; import { load } from "cheerio"; import { tiktokRegex } from "../utils/regex.mjs"; import { userAgent } from "../utils/helpers.mjs"; export default async (url) => { const match = url.match(tiktokRegex); if (!match) throw new Error("Invalid TikTok URL"); const html = await $fetch(url, { headers: { "User-Agent": userAgent } }).catch(() => null); if (!html) throw new Error("Failed to fetch the TikTok URL"); const $ = load(html); const scripts = $("script[id='__UNIVERSAL_DATA_FOR_REHYDRATION__']"); let data; let device_id; for (const script of scripts) { const content = $(script).html(); if (content?.includes("__DEFAULT_SCOPE__")) { const json = JSON.parse(content); data = json?.__DEFAULT_SCOPE__?.["webapp.video-detail"]?.itemInfo?.itemStruct; device_id = json?.__DEFAULT_SCOPE__?.["webapp.app-context"]?.wid; if (data && device_id) break; } } const tt_id = data?.id; const known_iid = ["7318518857994389254"]; const post = await $fetch("https://api22-normal-c-alisg.tiktokv.com/aweme/v1/feed/", { query: { region: "US", carrier_region: "US", aweme_id: tt_id, iid: known_iid[Math.floor(Math.random() * known_iid.length)], device_id, channel: "googleplay", app_name: "musical_ly", version_code: 350103, device_platform: "android", device_type: "ASUS_Z01QD", os_version: 14 }, headers: { "Content-Type": "application/x-www-form-urlencoded", "User-Agent": userAgent } }).catch(() => null); const item = post?.aweme_list?.find((item2) => item2?.aweme_id === tt_id); let tikwm; if (!item) { tikwm = await $fetch("https://www.tikwm.com/api/", { query: { url } }).catch(() => null); tikwm = tikwm?.data; if (!tikwm) throw new Error("Failed to fetch TikTok data"); } return { id: item?.aweme_id || tikwm?.id, caption: item?.desc?.trim() || tikwm?.title?.trim(), permalink_url: `https://www.tiktok.com/@${item?.author?.unique_id || tikwm?.author?.unique_id}/video/${item?.aweme_id || tikwm?.id}`, thumbnail_url: item?.video?.cover?.url_list.filter((url2) => url2.includes(".jpeg"))?.[0] || item?.video?.origin_cover?.url_list?.[0] || tikwm?.origin_cover, author: { id: item?.author?.uid || tikwm?.author?.id, name: item?.author?.nickname || tikwm?.author?.nickname, username: item?.author?.unique_id || tikwm?.author?.unique_id, avatar_url: item?.author?.avatar_medium?.url_list.filter((url2) => url2.includes(".jpeg"))?.[0] || item?.author?.avatar_thumb?.url_list?.[0] || tikwm?.author?.avatar }, like_count: item?.statistics?.digg_count || tikwm?.digg_count, download_count: item?.statistics?.download_count || tikwm?.download_count, play_count: item?.statistics?.play_count || tikwm?.play_count, share_count: item?.statistics?.share_count || tikwm?.share_count, created_at: item?.create_time || tikwm?.create_time, video: { width: item?.video?.play_addr?.width, height: item?.video?.play_addr?.height, duration: item?.video?.duration || (tikwm?.duration ? tikwm?.duration * 1e3 : void 0), url: item?.video.play_addr?.url_list?.[0] || tikwm?.play, watermark_url: item?.video?.download_addr?.url_list?.[0] || tikwm?.wmplay }, music: { id: item?.music?.id || tikwm?.music_info?.id, title: item?.music?.title || tikwm?.music_info?.title, author: item?.music?.author || tikwm?.music_info?.author, album: item?.music?.album || tikwm?.music_info?.album, cover_url: item?.music?.cover_large?.url_list.filter((url2) => url2.includes(".jpeg"))?.[0] || item?.music?.cover_thumb?.url_list?.[0] || tikwm?.music_info?.cover, play_url: item?.music?.play_url?.url_list?.[0] || tikwm?.music, duration: item?.music?.duration || tikwm?.music_info?.duration } }; };