UNPKG

auron

Version:

Interact with your ATProto labeler from your terminal

311 lines (275 loc) 9 kB
import EventEmitter from "events"; import { withLoader } from "../utils/loader"; import { getProfiles, getQueueItems, getRecords, getRepos, } from "../api/atproto"; import { database } from "../services/db"; import { ToolsOzoneModerationDefs } from "@atproto/api"; import { transformRecordViewToRecord, transformRepoViewToRepo, transformProfileViewToProfile, transformStatusToSubject, } from "../services/subject/transformers"; import { chunkArray } from "@atproto/common"; import { RecordRow } from "../schemas/record"; import { RepoRow } from "../schemas/repo"; import { SubjectRow } from "../schemas/subject"; export const fetchQueueItems = async (options: { count: number; cursor?: string; reviewState?: string; takendown?: boolean; }) => { const emitter = new EventEmitter(); const subjects = await withLoader( `Fetching queue items...`, async (updateMessage) => { emitter.on("update", ({ nextCursor, subjectCount, maxCount }) => { updateMessage( `Fetched page with cursor: ${nextCursor}, total subjects: ${subjectCount} out of ${maxCount}` ); }); const { cursor, subjectStatuses } = await getQueueItems( { maxCount: options.count ? Number(options.count) : undefined, reviewState: options.reviewState, takendown: options.takendown, cursor: options.cursor, }, emitter ); updateMessage( `Fetched ${subjectStatuses.length} queue items. Cursor: ${cursor}` ); return subjectStatuses; } ); return subjects; }; export const saveQueueItems = async ( subjects: ToolsOzoneModerationDefs.SubjectStatusView[] ) => { await withLoader( `Storing ${subjects.length} queue items...`, async (updateMessage) => { if (subjects.length === 0) { updateMessage("No queue items to store"); throw new Error("No queue items to store"); } for (const chunk of chunkArray(subjects, 500)) { await database.insertSubjects(chunk.map(transformStatusToSubject)); updateMessage( `Stored ${chunk.length} out of ${subjects.length} queue items in local database` ); } updateMessage(`Stored ${subjects.length} queue items in local database`); } ); }; export const fetchReposForSubjects = async () => { const emitter = new EventEmitter(); const subjects = await withLoader( `Fetching repos for subjects...`, async (updateMessage) => { emitter.on("update", ({ total, repoCount }) => { updateMessage(`Fetched ${repoCount} repos out of ${total}`); }); const dids = await database.getMissingRepoDids(); updateMessage(`Found ${dids.length} missing repos to be fetched`); if (dids.length === 0) { return; } const repos = await getRepos({ dids }, emitter); updateMessage( `Fetched ${repos.length} repos. Saving in local database now` ); for (const chunk of chunkArray(repos, 500)) { await database.saveRepos(chunk.map(transformRepoViewToRepo)); updateMessage( `Saved ${chunk.length} out of ${repos.length} repos in local database now` ); } updateMessage(`Saved ${repos.length} repos in local database`); return; } ); return subjects; }; export const fetchProfilesForSubjects = async () => { const emitter = new EventEmitter(); const subjects = await withLoader( `Fetching profiles for subjects...`, async (updateMessage) => { emitter.on("update", ({ total, profileCount }) => { updateMessage(`Fetched ${profileCount} profiles out of ${total}`); }); const dids = await database.getMissingProfileDids(); updateMessage(`Found ${dids.length} profiles to be fetched`); if (dids.length === 0) { return; } const profiles = await getProfiles({ dids }, emitter); updateMessage( `Fetched ${profiles.length} profiles. Saving in local database now` ); for (const chunk of chunkArray(profiles, 500)) { await database.saveProfiles(chunk.map(transformProfileViewToProfile)); updateMessage( `Saved ${chunk.length} out of ${profiles.length} profiles in local database now` ); } updateMessage(`Saved ${profiles.length} profiles in local database`); return; } ); return subjects; }; export const fetchRecordsForSubjects = async () => { const emitter = new EventEmitter(); const subjects = await withLoader( `Fetching records for subjects...`, async (updateMessage) => { emitter.on("update", ({ total, recordCount }) => { updateMessage(`Fetched ${recordCount} records out of ${total}`); }); const uris = await database.getMissingRecordUris(); updateMessage(`Found ${uris.length} missing records to be fetched`); if (uris.length === 0) { return; } const records = await getRecords({ uris }, emitter); updateMessage( `Fetched ${records.length} records. Saving in local database now` ); for (const chunk of chunkArray(records, 500)) { await database.saveRecords(chunk.map(transformRecordViewToRecord)); updateMessage( `Saved ${chunk.length} out of ${records.length} records in local database` ); } updateMessage(`Saved ${records.length} records in local database`); return; } ); return subjects; }; function looksLikeRegex(str: string) { // Check for common regex metacharacters const regexChars = /[.*+?^${}()|[\]\\]/; return regexChars.test(str); } function smartMatch(x: string, y: string) { if (looksLikeRegex(x)) { try { const regex = new RegExp(x); return regex.test(y); } catch (error) { // If regex is invalid, fall back to literal match return y.includes(x); } } else { // Treat as literal string return y.includes(x); } } export const processSubjects = async (options: { type?: string; cursor?: string; count?: number; bio?: string; keyword?: string; riskScore?: number; columns?: string; takendown?: boolean; lastCountry?: string[]; email?: string; }) => { let subjects = await database.listSubjects({ takendown: options.takendown, subjectType: options.type, cursor: options.cursor, limit: options.count, }); if (options.bio || options.keyword) { const keyword = `${options.bio || options.keyword}`.toLowerCase(); const keywords = keyword .split("||") .map((k) => k.trim()) .filter(Boolean); subjects = subjects.filter((subject) => { const { profile } = subject; if (!profile?.description) return false; const desc = profile.description.toLowerCase(); if (keywords.length === 1) { return desc.includes(keyword); } return keywords.some((k) => desc.includes(k)); }); } if (options.email) { const keyword = options.email.toLowerCase(); subjects = subjects.filter((subject) => { const { email } = subject; if (!email) return false; return smartMatch(keyword, email.toLowerCase()); }); } if (options.keyword) { const keyword = options.keyword.toLowerCase(); const keywords = keyword .split("||") .map((k) => k.trim()) .filter(Boolean); subjects = subjects.filter((subject) => { const recordValue = JSON.parse(subject.value || "{}"); const text = recordValue.text?.toLowerCase(); if (!text) return false; if (keywords.length === 1) { return text.includes(keyword); } return keywords.some((k) => text.includes(k)); }); } if (options.riskScore) { subjects = subjects.filter((subject) => { if (!options.riskScore) return; const hcap = subject.threatSignatures || []; const riskScore = hcap.find( (h: Record<string, unknown>) => h.property === "riskScore" )?.value; return riskScore && riskScore > options.riskScore; }); } if (options.lastCountry?.length) { const countriesToMatch = options.lastCountry.map((c) => c.toLowerCase()); subjects = subjects.filter((subject) => { const hcap = subject.threatSignatures || []; const lastCountry = hcap.find( (h: Record<string, unknown>) => h.property === "lastSigninCountry" )?.value; if (!lastCountry) return false; return countriesToMatch.includes(lastCountry.toLowerCase()); }); } return subjects.map((s) => filterSubjectColumns(s, options.columns?.split(",")) ); }; export const filterSubjectColumns = ( subject: SubjectRow & RepoRow & RecordRow, columns?: string[] ): Record<string, any> => { if (!columns) return subject; const data: Record<string, any> = {}; columns.forEach((column) => { if (column in subject) { // @ts-ignore data[column] = subject[column] as any; } }); return data; };