@microfox/cli
Version:
Universal CLI tool for creating modern TypeScript packages with npm availability checking
136 lines (119 loc) • 4.4 kB
Plain Text
import { SQSClient, SendMessageCommand } from "@aws-sdk/client-sqs";
import {
subredditStore,
ragSubredditPaginator,
subredditListPaginator,
} from "../helpers/ragRedis.js";
import { handler as indexSubredditHandler } from "./sqs-index.js";
import { createMockSQSEvent, ProcessTask } from "@microfox/tool-core";
const sqsClient = new SQSClient({});
const isOffline = process.env.IS_OFFLINE === "true";
const STALE_AFTER_SECONDS = 60 * 60; // 1 hour
const ITEMS_PER_PAGE = 100;
/**
* An example of how a cronjob indexes subreddits progressively (in batches)
* @returns
*/
export const handler = async (): Promise<void> => {
console.log("handler: cron-index-subreddit");
const taskHandler = new ProcessTask({
url: process.env.TASK_UPSTASH_REDIS_REST_URL,
token: process.env.TASK_UPSTASH_REDIS_REST_TOKEN,
});
try {
let paginatorState = await subredditListPaginator.getCurrentStatus();
if (!paginatorState) {
paginatorState = await subredditListPaginator.startNewIndexing({
page: 1,
itemsPerPage: ITEMS_PER_PAGE,
totalCount: 0,
});
}
if (!paginatorState.progress) {
console.error("handler: paginator state is not valid", {
paginatorState,
});
return;
}
const page = paginatorState.progress.page;
console.log("handler: processing page", { page });
const allSubreddits = await subredditStore.list();
const totalCount = allSubreddits.length;
const totalPages = Math.ceil(totalCount / ITEMS_PER_PAGE);
console.log("handler: subreddits stats", { totalCount, totalPages });
if (totalCount === 0) {
console.log("handler: no subreddits to process");
return;
}
const startIndex = (page - 1) * ITEMS_PER_PAGE;
if (startIndex >= totalCount && totalCount > 0) {
console.log("handler: page out of bounds, resetting to page 1");
await subredditListPaginator.updateIndexingStatus({
page: 1,
totalCount,
});
// We could either process page 1 now or wait for the next cron run.
// Let's wait for the next run to keep logic simple.
return;
}
const subredditsToProcess = allSubreddits.slice(
startIndex,
startIndex + ITEMS_PER_PAGE
);
console.log("handler: processing subreddits", {
count: subredditsToProcess.length,
});
for (const subreddit of subredditsToProcess) {
const subredditName = subreddit.name;
console.log("handler: checking subreddit", { subredditName });
const isStale =
await ragSubredditPaginator(subredditName).isStale(STALE_AFTER_SECONDS);
if (isStale) {
console.log("handler: subreddit is stale, triggering indexing", {
subredditName,
});
const task = await taskHandler.createTask({
subreddit: subredditName,
});
const sqsMessageBody = {
subreddit: subredditName,
task_id: task.id,
};
if (isOffline) {
console.log(
"handler: running in offline mode, invoking handler directly"
);
const sqsEvent = createMockSQSEvent(sqsMessageBody);
await indexSubredditHandler(sqsEvent);
console.log("handler: invoked handler directly");
} else {
if (!process.env.INDEX_SUBREDDIT_QUEUE_URL) {
console.error("INDEX_SUBREDDIT_QUEUE_URL is not set");
continue; // Move to the next subreddit
}
console.log("handler: sending message to SQS", {
queueUrl: process.env.INDEX_SUBREDDIT_QUEUE_URL,
});
const command = new SendMessageCommand({
QueueUrl: process.env.INDEX_SUBREDDIT_QUEUE_URL,
MessageBody: JSON.stringify(sqsMessageBody),
});
await sqsClient.send(command);
console.log("handler: sent message to SQS");
}
} else {
console.log("handler: subreddit is not stale, skipping", {
subredditName,
});
}
}
const nextPage = page >= totalPages && totalPages > 0 ? 1 : page + 1;
console.log("handler: updating paginator for next run", { nextPage });
await subredditListPaginator.updateIndexingStatus({
page: nextPage,
totalCount,
});
} catch (error) {
console.error("handler: failed to process cron-index-subreddit", error);
}
};