UNPKG

media-scraper

Version:

TypeScript-first multi-platform social media scraper without API keys

46 lines (45 loc) 2.09 kB
import { $fetch } from "ofetch"; import { load } from "cheerio"; import { facebookHeaders } from "../utils/helpers.mjs"; import { facebookRegex } from "../utils/regex.mjs"; export default async (url) => { const match = url.match(facebookRegex); if (!match) throw new Error("Invalid Facebook URL"); const post = await $fetch(url, { headers: facebookHeaders }).catch(() => null); if (!post) throw new Error("Failed to fetch the Facebook URL"); const $ = load(post); const scripts = $("script[type='application/json']"); const metaDescription = $("meta[name='description']")?.attr("content"); const mustInclude = ["RelayPrefetchedStreamCache", "videoDeliveryLegacyFields"]; const mustNotInclude = ["CometUFI"]; let data; for (const script of scripts) { const content = $(script).html(); if (content && mustInclude.every((term) => content.includes(term) && !mustNotInclude.some((term2) => content.includes(term2)))) { const json = JSON.parse(content); data = json?.require?.[0]?.[3]?.[0]?.__bbox?.require?.find((item) => item?.includes("RelayPrefetchedStreamCache"))?.[3]?.[1]?.__bbox?.result?.data; } } const video = data?.video; const caption = video?.creation_story?.message?.text || metaDescription; const attachment = video?.story?.attachments?.find((item) => item?.media?.id === video?.id); const media = attachment?.media || video.creation_story.short_form_video_context.playback_video; const { width, height } = media; const duration = media.playable_duration_in_ms || (media?.length_in_second ? media.length_in_second * 1e3 : void 0); const thumbnail_url = media?.thumbnailImage?.uri || media?.preferred_thumbnail?.image?.uri; const playback_video = media?.videoDeliveryLegacyFields; return { id: video.id, caption: caption?.trim(), permalink_url: media?.permalink_url || media?.url, thumbnail_url, width, height, created_at: media?.publish_time, video: { duration, sd_url: playback_video?.browser_native_sd_url, hd_url: playback_video?.browser_native_hd_url } }; };