UNPKG

@microfox/cli

Version:

Universal CLI tool for creating modern TypeScript packages with npm availability checking

136 lines (119 loc) 4.4 kB
import { SQSClient, SendMessageCommand } from "@aws-sdk/client-sqs"; import { subredditStore, ragSubredditPaginator, subredditListPaginator, } from "../helpers/ragRedis.js"; import { handler as indexSubredditHandler } from "./sqs-index.js"; import { createMockSQSEvent, ProcessTask } from "@microfox/tool-core"; const sqsClient = new SQSClient({}); const isOffline = process.env.IS_OFFLINE === "true"; const STALE_AFTER_SECONDS = 60 * 60; // 1 hour const ITEMS_PER_PAGE = 100; /** * An example of how a cronjob indexes subreddits progressively (in batches) * @returns */ export const handler = async (): Promise<void> => { console.log("handler: cron-index-subreddit"); const taskHandler = new ProcessTask({ url: process.env.TASK_UPSTASH_REDIS_REST_URL, token: process.env.TASK_UPSTASH_REDIS_REST_TOKEN, }); try { let paginatorState = await subredditListPaginator.getCurrentStatus(); if (!paginatorState) { paginatorState = await subredditListPaginator.startNewIndexing({ page: 1, itemsPerPage: ITEMS_PER_PAGE, totalCount: 0, }); } if (!paginatorState.progress) { console.error("handler: paginator state is not valid", { paginatorState, }); return; } const page = paginatorState.progress.page; console.log("handler: processing page", { page }); const allSubreddits = await subredditStore.list(); const totalCount = allSubreddits.length; const totalPages = Math.ceil(totalCount / ITEMS_PER_PAGE); console.log("handler: subreddits stats", { totalCount, totalPages }); if (totalCount === 0) { console.log("handler: no subreddits to process"); return; } const startIndex = (page - 1) * ITEMS_PER_PAGE; if (startIndex >= totalCount && totalCount > 0) { console.log("handler: page out of bounds, resetting to page 1"); await subredditListPaginator.updateIndexingStatus({ page: 1, totalCount, }); // We could either process page 1 now or wait for the next cron run. // Let's wait for the next run to keep logic simple. return; } const subredditsToProcess = allSubreddits.slice( startIndex, startIndex + ITEMS_PER_PAGE ); console.log("handler: processing subreddits", { count: subredditsToProcess.length, }); for (const subreddit of subredditsToProcess) { const subredditName = subreddit.name; console.log("handler: checking subreddit", { subredditName }); const isStale = await ragSubredditPaginator(subredditName).isStale(STALE_AFTER_SECONDS); if (isStale) { console.log("handler: subreddit is stale, triggering indexing", { subredditName, }); const task = await taskHandler.createTask({ subreddit: subredditName, }); const sqsMessageBody = { subreddit: subredditName, task_id: task.id, }; if (isOffline) { console.log( "handler: running in offline mode, invoking handler directly" ); const sqsEvent = createMockSQSEvent(sqsMessageBody); await indexSubredditHandler(sqsEvent); console.log("handler: invoked handler directly"); } else { if (!process.env.INDEX_SUBREDDIT_QUEUE_URL) { console.error("INDEX_SUBREDDIT_QUEUE_URL is not set"); continue; // Move to the next subreddit } console.log("handler: sending message to SQS", { queueUrl: process.env.INDEX_SUBREDDIT_QUEUE_URL, }); const command = new SendMessageCommand({ QueueUrl: process.env.INDEX_SUBREDDIT_QUEUE_URL, MessageBody: JSON.stringify(sqsMessageBody), }); await sqsClient.send(command); console.log("handler: sent message to SQS"); } } else { console.log("handler: subreddit is not stale, skipping", { subredditName, }); } } const nextPage = page >= totalPages && totalPages > 0 ? 1 : page + 1; console.log("handler: updating paginator for next run", { nextPage }); await subredditListPaginator.updateIndexingStatus({ page: nextPage, totalCount, }); } catch (error) { console.error("handler: failed to process cron-index-subreddit", error); } };