media-scraper
Version:
TypeScript-first multi-platform social media scraper without API keys
46 lines (45 loc) • 2.09 kB
JavaScript
import { $fetch } from "ofetch";
import { load } from "cheerio";
import { facebookHeaders } from "../utils/helpers.mjs";
import { facebookRegex } from "../utils/regex.mjs";
export default async (url) => {
const match = url.match(facebookRegex);
if (!match) throw new Error("Invalid Facebook URL");
const post = await $fetch(url, { headers: facebookHeaders }).catch(() => null);
if (!post) throw new Error("Failed to fetch the Facebook URL");
const $ = load(post);
const scripts = $("script[type='application/json']");
const metaDescription = $("meta[name='description']")?.attr("content");
const mustInclude = ["RelayPrefetchedStreamCache", "videoDeliveryLegacyFields"];
const mustNotInclude = ["CometUFI"];
let data;
for (const script of scripts) {
const content = $(script).html();
if (content && mustInclude.every((term) => content.includes(term) && !mustNotInclude.some((term2) => content.includes(term2)))) {
const json = JSON.parse(content);
data = json?.require?.[0]?.[3]?.[0]?.__bbox?.require?.find((item) => item?.includes("RelayPrefetchedStreamCache"))?.[3]?.[1]?.__bbox?.result?.data;
}
}
const video = data?.video;
const caption = video?.creation_story?.message?.text || metaDescription;
const attachment = video?.story?.attachments?.find((item) => item?.media?.id === video?.id);
const media = attachment?.media || video.creation_story.short_form_video_context.playback_video;
const { width, height } = media;
const duration = media.playable_duration_in_ms || (media?.length_in_second ? media.length_in_second * 1e3 : void 0);
const thumbnail_url = media?.thumbnailImage?.uri || media?.preferred_thumbnail?.image?.uri;
const playback_video = media?.videoDeliveryLegacyFields;
return {
id: video.id,
caption: caption?.trim(),
permalink_url: media?.permalink_url || media?.url,
thumbnail_url,
width,
height,
created_at: media?.publish_time,
video: {
duration,
sd_url: playback_video?.browser_native_sd_url,
hd_url: playback_video?.browser_native_hd_url
}
};
};