astro-loader-tweets
Version:
Astro loader for loading tweets by ID.
678 lines (673 loc) • 22 kB
JavaScript
import { TwitterApi, ApiResponseError } from 'twitter-api-v2';
import { z } from 'astro/zod';
import fs from 'node:fs/promises';
import path from 'node:path';
// src/index.ts
// package.json
var package_default = {
name: "astro-loader-tweets",
homepage: "https://github.com/lin-stephanie/astro-loaders/blob/main/packages/astro-loader-tweets/"};
var defaultConfig = {
storage: "default",
storePath: "src/data/tweets.json",
removeTrailingUrls: true,
urlTextType: "post-text",
newlineHandling: "none"
};
var TweetsLoaderConfigSchema = z.object({
/**
* An array of Tweet IDs to fetch content for.
*/
ids: z.array(z.string()),
/**
* The method to store the loaded tweets:
* - `'default'`: Uses Astro's default KV store (`./store/data-store.json`).
* - `'custom'`: Use a custom JSON file path.
* - `'both'`: Both default and custom path.
*
* @default 'default'
*/
storage: z.enum(["default", "custom", "both"]).default(defaultConfig.storage),
/**
* The custom output path for storing tweets, either absolute or
* relative to the Astro project root. Must end with `.json`.
* Required if `storage` is `'custom'` or `'both'`.
*
* @example
* 'src/data/tweets.json'
* './src/content/tweets.json'
* '/absolute/path/to/tweets.json'
*/
storePath: z.string().default(defaultConfig.storePath),
/**
* Whether to remove trailing URLs from the tweet text in the generated `text_html` and `text_markdown`,
* typically used for views or referenced tweets.
*
* @default true
*/
removeTrailingUrls: z.boolean().default(defaultConfig.removeTrailingUrls),
/**
* The type of text to display for links when generating `text_html` and `text_markdown`:
* - `'domain-path'`: Shows the link's domain and path.
* - `'post-text'`: Uses the link text as shown in the tweet.
*
* @default 'post-text'
*/
linkTextType: z.enum(["domain-path", "post-text"]).default(defaultConfig.urlTextType),
/**
* The way for processing `\n` in `text_html` generation:
* - `'none'`: Keep as is.
* - `'break'`: Replace consecutive `\n` with `<br>`.
* - `'paragraph'`: Wrap paragraphs with `<p>` while removing standalone `\n`.
*
* @default 'none'
*/
newlineHandling: z.enum(["none", "break", "paragraph"]).default(defaultConfig.newlineHandling),
/**
* The X app-only Bearer Token for authentication.
*
* This is optional; by default, it reads from the `X_TOKEN` environment variable.
* You may also configure it directly here (not recommended; if you do, ensure it is not exposed
* in public code repositories).
*
* @see
* - {@link https://developer.x.com/en/docs/authentication/oauth-2-0/bearer-tokens How to create an X app-only Bearer Token}
* - {@link https://docs.astro.build/en/guides/environment-variables/#setting-environment-variables How to store token in Astro project environment variables}
*/
authToken: z.string().optional()
}).superRefine((config, ctx) => {
if ((config.storage === "custom" || config.storage === "both") && config.storePath && !config.storePath.endsWith(".json")) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "`storePath` must end with `.json`"
});
}
});
var getTweetsApiOptions = {
expansions: [
"author_id",
"geo.place_id",
"attachments.media_keys",
"attachments.poll_ids"
],
"tweet.fields": [
"id",
"text",
"edit_history_tweet_ids",
"attachments",
"author_id",
"conversation_id",
"created_at",
"entities",
"in_reply_to_user_id",
"lang",
"public_metrics",
"referenced_tweets"
],
"user.fields": [
"id",
"name",
"username",
"connection_status",
"created_at",
"description",
"entities",
"profile_image_url",
"public_metrics",
"url"
],
"place.fields": [
"id",
"full_name",
"contained_within",
"country",
"country_code",
"geo",
"name",
"place_type"
],
"media.fields": [
"media_key",
"type",
"url",
"preview_image_url",
"height",
"width",
"alt_text",
"duration_ms",
"public_metrics",
"variants"
],
"poll.fields": [
"id",
"options",
"duration_minutes",
"end_datetime",
"voting_status"
]
};
var TweetV2Schema = z.object({
id: z.string(),
text: z.string(),
edit_history_tweet_ids: z.array(z.string()),
attachments: z.object({
media_keys: z.array(z.string()).optional(),
poll_ids: z.array(z.string()).optional()
}).optional(),
author_id: z.string().optional(),
conversation_id: z.string().optional(),
created_at: z.string().optional(),
entities: z.object({
annotations: z.array(
z.object({
start: z.number(),
end: z.number(),
probability: z.number(),
type: z.string(),
normalized_text: z.string()
})
).optional(),
urls: z.array(
z.object({
start: z.number(),
end: z.number(),
url: z.string(),
expanded_url: z.string(),
display_url: z.string(),
unwound_url: z.string().optional(),
title: z.string().optional(),
description: z.string().optional(),
status: z.union([z.string(), z.number()]).optional(),
media_key: z.string().optional(),
images: z.array(
z.object({
url: z.string(),
width: z.number(),
height: z.number()
})
).optional()
})
).optional(),
hashtags: z.array(
z.object({
start: z.number(),
end: z.number(),
tag: z.string()
})
).optional(),
cashtags: z.array(
z.object({
start: z.number(),
end: z.number(),
tag: z.string()
})
).optional(),
mentions: z.array(
z.object({
start: z.number(),
end: z.number(),
username: z.string(),
id: z.string()
})
).optional()
}).optional(),
geo: z.object({
coordinates: z.object({
type: z.string(),
coordinates: z.tuple([z.number(), z.number()]).nullable()
}),
place_id: z.string()
}).optional(),
in_reply_to_user_id: z.string().optional(),
lang: z.string().optional(),
public_metrics: z.object({
retweet_count: z.number(),
reply_count: z.number(),
like_count: z.number(),
quote_count: z.number(),
impression_count: z.number(),
bookmark_count: z.number().optional()
}).optional(),
referenced_tweets: z.array(
z.object({
type: z.union([
z.literal("retweeted"),
z.literal("quoted"),
z.literal("replied_to")
]),
id: z.string()
}).optional()
).optional(),
source: z.string().optional()
});
var TweetV2ExtendedSchema = TweetV2Schema.extend({
text_html: z.string(),
text_markdown: z.string(),
view_type: z.enum(["none", "media", "link"]),
url_for_link_view: z.string().optional()
});
var EntitySchema = z.object({
start: z.number(),
end: z.number()
});
var UrlEntitySchema = EntitySchema.extend({
url: z.string(),
expanded_url: z.string(),
display_url: z.string()
});
var HashtagEntitySchema = EntitySchema.extend({
tag: z.string().optional(),
hashtag: z.string().optional()
});
var CashtagEntitySchema = EntitySchema.extend({
tag: z.string().optional(),
cashtag: z.string().optional()
});
var MentionEntitySchema = EntitySchema.extend({
username: z.string().optional()
});
var UserV2Schema = z.object({
id: z.string(),
name: z.string(),
username: z.string(),
connection_status: z.array(z.string()).optional(),
created_at: z.string().optional(),
description: z.string().optional(),
entities: z.object({
url: z.object({
urls: z.array(UrlEntitySchema)
}).optional(),
description: z.object({
urls: z.array(UrlEntitySchema).optional(),
hashtags: z.array(HashtagEntitySchema).optional(),
cashtags: z.array(CashtagEntitySchema).optional(),
mentions: z.array(MentionEntitySchema).optional()
}).optional()
}).optional(),
profile_image_url: z.string().optional(),
public_metrics: z.object({
followers_count: z.number().optional(),
following_count: z.number().optional(),
tweet_count: z.number().optional(),
listed_count: z.number().optional(),
like_count: z.number().optional(),
media_count: z.number().optional()
}).optional(),
url: z.string().optional()
});
var PlaceV2Schema = z.object({
id: z.string(),
full_name: z.string(),
contained_within: z.array(z.string()).optional(),
country: z.string().optional(),
country_code: z.string().optional(),
geo: z.object({
type: z.string(),
bbox: z.array(z.number()),
properties: z.any()
}).optional(),
name: z.string().optional(),
place_type: z.string().optional()
});
var MediaVariantsV2Schema = z.object({
bit_rate: z.number().optional(),
content_type: z.union([
z.literal("video/mp4"),
z.literal("application/x-mpegURL"),
z.string()
]),
url: z.string()
});
var MediaObjectV2Schema = z.object({
media_key: z.string(),
type: z.union([
z.literal("video"),
z.literal("animated_gif"),
z.literal("photo"),
z.string()
]),
url: z.string().optional(),
preview_image_url: z.string().optional(),
width: z.number().optional(),
height: z.number().optional(),
alt_text: z.string().optional(),
duration_ms: z.number().optional(),
public_metrics: z.object({
view_count: z.number()
}).optional(),
variants: z.array(MediaVariantsV2Schema).optional()
});
var PollV2Schema = z.object({
id: z.string(),
options: z.array(
z.object({
position: z.number(),
label: z.string(),
votes: z.number()
})
),
duration_minutes: z.number().optional(),
end_datetime: z.string().optional(),
voting_status: z.string().optional()
});
z.object({
users: z.array(UserV2Schema).optional(),
places: z.array(PlaceV2Schema).optional(),
media: z.array(MediaObjectV2Schema).optional(),
polls: z.array(PollV2Schema).optional()
});
var TweetSchema = z.object({
id: z.string(),
tweet: TweetV2ExtendedSchema,
user: z.union([UserV2Schema, z.null()]),
place: z.union([PlaceV2Schema, z.null()]),
media: z.union([z.array(MediaObjectV2Schema), z.null()]),
poll: z.union([z.array(PollV2Schema), z.null()])
});
function getTrailingUrls(text) {
if (text.trimEnd().length === 0) return [];
const regex = /(\s+https?:\/\/t\.co\/[A-Za-z0-9]+)+$/;
const match = text.match(regex);
if (!match) return [];
const matchedStr = match[0];
const trailingTokens = matchedStr.trim().split(/\s+/);
return trailingTokens;
}
function cleanupUrlFromText(text, urlToRemove) {
const idx = text.lastIndexOf(urlToRemove);
if (idx === -1) return text;
return text.slice(0, idx).trimEnd();
}
function escapeHTML(str) {
const escapeMap = {
"&": "&",
"<": "<",
">": ">",
'"': """,
"'": "'"
};
return str?.replace(/[&<>"']/g, (match) => escapeMap[match] || match) ?? "";
}
function getDomainAndPath(url) {
const parsedUrl = new URL(url);
return `${parsedUrl.hostname}${parsedUrl.pathname}`;
}
function processTweetText(tweet, options) {
const { removeTrailingUrls, linkTextType, newlineHandling } = options;
const entities = tweet.entities || {};
const urls = entities.urls || [];
const hashtags = entities.hashtags || [];
const cashtags = entities.cashtags || [];
const mentions = entities.mentions || [];
let text = tweet.text;
let viewType = "none";
let urlForLinkView = void 0;
const trailingUrls = getTrailingUrls(text);
if (trailingUrls.length > 0) {
const lastUrl = trailingUrls[trailingUrls.length - 1];
const foundUrl = urls.find((u) => u.url === lastUrl);
viewType = foundUrl?.media_key && foundUrl.media_key.length > 0 || tweet.attachments?.media_keys && tweet.attachments.media_keys.length > 0 ? "media" : "link";
if (viewType === "link") urlForLinkView = foundUrl?.expanded_url;
if (removeTrailingUrls) {
text = cleanupUrlFromText(text, lastUrl);
if (foundUrl && /https:\/\/twitter\.com\/\w+\/status\/\d+/.test(foundUrl.expanded_url)) {
for (let i = trailingUrls.length - 2; i >= 0; i--) {
const u = trailingUrls[i];
const entity = urls.find((e) => e.url === u);
if (entity?.media_key) {
text = cleanupUrlFromText(text, u);
}
}
}
}
}
let textHtml = text;
let textMarkdown = text;
if (hashtags.length > 0) {
textHtml = textHtml.replace(
/(^|\W)#([\p{L}\p{M}\w]+)/gu,
(match, prefix, tag) => {
const entity = hashtags.find((h) => h.tag === tag);
if (entity) {
const escapedTag = escapeHTML(tag);
const url = `https://x.com/hashtag/${encodeURIComponent(escapedTag)}`;
const htmlLink = `${prefix}<a href="${url}" target="_blank" rel="noopener noreferrer" aria-lable="Hashtag #${escapedTag}">#${escapedTag}</a>`;
const mdLink = `${prefix}[#${escapedTag}](${url})`;
textMarkdown = textMarkdown.replace(match, mdLink);
return htmlLink;
}
return match;
}
);
}
if (cashtags.length > 0) {
textHtml = textHtml.replace(
/(^|\W)\$([A-Za-z0-9]+)/g,
(match, prefix, tag) => {
const entity = cashtags.find((c) => c.tag === tag);
if (entity) {
const escapedTag = escapeHTML(tag);
const url = `https://x.com/search?q=%24${encodeURIComponent(escapedTag)}`;
const htmlLink = `${prefix}<a href="${url}" target="_blank" rel="noopener noreferrer" aria-lable="Cashtag $${escapedTag}">$${escapedTag}</a>`;
const mdLink = `${prefix}[$${escapedTag}](${url})`;
textMarkdown = textMarkdown.replace(match, mdLink);
return htmlLink;
}
return match;
}
);
}
if (mentions.length > 0) {
textHtml = textHtml.replace(/(^|\W)@(\w+)/g, (match, prefix, username) => {
const entity = mentions.find((m) => m.username === username);
if (entity) {
const escapedUsername = escapeHTML(username);
const url = `https://x.com/${encodeURIComponent(entity.username)}`;
const htmlLink = `${prefix}<a href="${url}" target="_blank" rel="noopener noreferrer" aria-lable="Mention @${escapedUsername}">@${escapedUsername}</a>`;
const mdLink = `${prefix}[@${escapedUsername}](${url})`;
textMarkdown = textMarkdown.replace(match, mdLink);
return htmlLink;
}
return match;
});
}
let lastMatchedUrlEntity;
if (urls.length > 0) {
textHtml = textHtml.replace(/https?:\/\/t\.co\/[A-Za-z0-9]+/g, (match) => {
const entity = urls.find((e) => e.url === match);
if (entity) {
lastMatchedUrlEntity = entity;
const escapedUrl = escapeHTML(entity.expanded_url);
const escapedText = linkTextType === "domain-path" ? escapeHTML(getDomainAndPath(entity.expanded_url)) : escapeHTML(entity.display_url);
const htmlLink = `<a href="${escapedUrl}" target="_blank" rel="noopener noreferrer" aria-lable="Link to ${escapedText}">${escapedText}</a>`;
const mdLink = `[${escapedText}](${escapedUrl})`;
textMarkdown = textMarkdown.replace(match, mdLink);
return htmlLink;
}
return match;
});
}
if (viewType === "none" && lastMatchedUrlEntity) {
viewType = lastMatchedUrlEntity.media_key ? "media" : "link";
if (viewType === "link") urlForLinkView = lastMatchedUrlEntity.expanded_url;
}
if (newlineHandling !== "none") {
if (newlineHandling === "break") {
textHtml = textHtml.replace(/\n+/g, "<br/ >\n");
} else if (newlineHandling === "paragraph") {
textHtml = textHtml.split("\n").map((line) => {
const l = line.trim();
if (l.length > 0) return `<p>${l}</p>`;
}).join("");
}
}
return {
...tweet,
text_html: textHtml,
text_markdown: textMarkdown,
view_type: viewType,
url_for_link_view: urlForLinkView
};
}
function processTweets(processedTweets, includes) {
if (!includes) {
return processedTweets.map((tweet) => ({
id: tweet.id,
tweet,
user: null,
place: null,
media: null,
poll: null
}));
}
return processedTweets.map((tweet) => {
const processedTweet = {
id: tweet.id,
tweet,
user: null,
place: null,
media: null,
poll: null
};
if (tweet.author_id && includes.users) {
processedTweet.user = includes.users.find((user) => user.id === tweet.author_id) || null;
}
if (tweet.geo?.place_id && includes.places) {
processedTweet.place = includes.places.find((place) => place.id === tweet.geo?.place_id) || null;
}
if (tweet.attachments?.media_keys && includes.media) {
const mediaArray = includes.media.filter(
(media) => tweet.attachments?.media_keys?.includes(media.media_key)
);
processedTweet.media = mediaArray.length > 0 ? mediaArray : null;
} else {
processedTweet.media = null;
}
if (tweet.attachments?.poll_ids && includes.polls) {
const pollArray = includes.polls.filter(
(poll) => tweet.attachments?.poll_ids?.includes(poll.id)
);
processedTweet.poll = pollArray.length > 0 ? pollArray : null;
} else {
processedTweet.poll = null;
}
return processedTweet;
});
}
var SavedTweets = z.array(
z.object({
id: z.string()
}).passthrough()
);
async function saveOrUpdateTweets(tweets, storePath) {
const resolvedPath = path.isAbsolute(storePath) ? storePath : path.resolve(process.cwd(), storePath);
let savedTweets = [];
let fileExists = true;
try {
await fs.access(resolvedPath);
} catch {
fileExists = false;
}
if (fileExists) {
const fileContent = await fs.readFile(resolvedPath, "utf-8");
const parsedContent = JSON.parse(fileContent);
const parsedData = SavedTweets.safeParse(parsedContent);
if (!parsedData.success)
throw Error(
"Invalid JSON format. Ensure the file contains an array of objects, each with a valid `id` field as a string."
);
savedTweets = parsedData.data;
const savedTweetsMap = new Map(savedTweets.map((t) => [t.id, t]));
for (const newTweet of savedTweets) {
savedTweetsMap.set(newTweet.id, newTweet);
}
savedTweets = Array.from(savedTweetsMap.values());
} else {
savedTweets = tweets;
}
await fs.mkdir(path.dirname(resolvedPath), { recursive: true });
await fs.writeFile(resolvedPath, JSON.stringify(savedTweets, null, 2), "utf8");
}
// src/index.ts
var MAX_IDS_PER_REQUEST = 100;
function tweetsLoader(userConfig) {
return {
name: package_default.name,
schema: TweetSchema,
async load({ logger, store, parseData, generateDigest }) {
const parsedConfig = TweetsLoaderConfigSchema.safeParse(userConfig);
if (!parsedConfig.success) {
logger.error(
`The configuration provided is invalid. ${parsedConfig.error.issues.map((issue) => issue.message).join("\n")}.
Check out the configuration: ${package_default.homepage}README.md#configuration.`
);
return;
}
const { ids, storage, storePath, authToken, ...processTweetTextConfig } = parsedConfig.data;
const token = authToken || import.meta.env.X_TOKEN;
if (ids.length === 0) {
logger.warn("No tweet IDs provided and no tweets will be loaded");
return;
}
if (!token) {
logger.error(
"No X (Twitter) token provided. Please provide a `authToken` or set the `X_TOKEN` environment variable.\nHow to create an X app-only Bearer Token: https://developer.x.com/en/docs/authentication/oauth-2-0/bearer-tokens.\nHow to store token in Astro project environment variables: https://docs.astro.build/en/guides/environment-variables/#setting-environment-variables."
);
return;
}
logger.info(`Loading ${ids.length} tweets`);
const tweets = [];
const client = new TwitterApi(token);
try {
let index = 0;
while (index < ids.length) {
const batchIds = ids.slice(index, index + MAX_IDS_PER_REQUEST);
const res = await client.v2.tweets(batchIds, getTweetsApiOptions);
const processedTweets = res.data.map(
(tweet) => processTweetText(tweet, processTweetTextConfig)
);
tweets.push(...processTweets(processedTweets, res.includes));
index += MAX_IDS_PER_REQUEST;
}
if (storage === "default" || storage === "both") {
for (const item of tweets) {
const parsedItem = await parseData({
id: item.id,
data: item
});
store.set({
id: item.id,
data: parsedItem,
digest: generateDigest(parsedItem),
rendered: { html: item.tweet.text_html }
});
}
logger.info(
`Successfully loaded ${tweets.length} tweets into the Astro store`
);
}
if (storage === "custom" || storage === "both") {
try {
await saveOrUpdateTweets(tweets, storePath);
logger.info(
`Successfully loaded ${tweets.length} tweets into '${storePath}'`
);
} catch (error) {
logger.error(
`Failed to save tweets to '${storePath}'. ${error.message}`
);
}
}
} catch (error) {
if (error instanceof ApiResponseError && error.rateLimitError && error.rateLimit) {
logger.warn(
`Please try again later as the rate limit of ${error.rateLimit.limit} per 15 minutes is exceeded with ${error.rateLimit.remaining} left`
);
} else {
logger.error(`Failed to load tweets. ${error.message}`);
}
}
}
};
}
export { TweetSchema, tweetsLoader };