UNPKG

auron

Version:

Interact with your ATProto labeler from your terminal

220 lines (219 loc) 9.56 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.filterSubjectColumns = exports.processSubjects = exports.fetchRecordsForSubjects = exports.fetchProfilesForSubjects = exports.fetchReposForSubjects = exports.saveQueueItems = exports.fetchQueueItems = void 0; const events_1 = __importDefault(require("events")); const loader_1 = require("../utils/loader"); const atproto_1 = require("../api/atproto"); const db_1 = require("../services/db"); const transformers_1 = require("../services/subject/transformers"); const common_1 = require("@atproto/common"); const fetchQueueItems = async (options) => { const emitter = new events_1.default(); const subjects = await (0, loader_1.withLoader)(`Fetching queue items...`, async (updateMessage) => { emitter.on("update", ({ nextCursor, subjectCount, maxCount }) => { updateMessage(`Fetched page with cursor: ${nextCursor}, total subjects: ${subjectCount} out of ${maxCount}`); }); const { cursor, subjectStatuses } = await (0, atproto_1.getQueueItems)({ maxCount: options.count ? Number(options.count) : undefined, reviewState: options.reviewState, takendown: options.takendown, cursor: options.cursor, }, emitter); updateMessage(`Fetched ${subjectStatuses.length} queue items. Cursor: ${cursor}`); return subjectStatuses; }); return subjects; }; exports.fetchQueueItems = fetchQueueItems; const saveQueueItems = async (subjects) => { await (0, loader_1.withLoader)(`Storing ${subjects.length} queue items...`, async (updateMessage) => { if (subjects.length === 0) { updateMessage("No queue items to store"); throw new Error("No queue items to store"); } for (const chunk of (0, common_1.chunkArray)(subjects, 500)) { await db_1.database.insertSubjects(chunk.map(transformers_1.transformStatusToSubject)); updateMessage(`Stored ${chunk.length} out of ${subjects.length} queue items in local database`); } updateMessage(`Stored ${subjects.length} queue items in local database`); }); }; exports.saveQueueItems = saveQueueItems; const fetchReposForSubjects = async () => { const emitter = new events_1.default(); const subjects = await (0, loader_1.withLoader)(`Fetching repos for subjects...`, async (updateMessage) => { emitter.on("update", ({ total, repoCount }) => { updateMessage(`Fetched ${repoCount} repos out of ${total}`); }); const dids = await db_1.database.getMissingRepoDids(); updateMessage(`Found ${dids.length} missing repos to be fetched`); if (dids.length === 0) { return; } const repos = await (0, atproto_1.getRepos)({ dids }, emitter); updateMessage(`Fetched ${repos.length} repos. Saving in local database now`); for (const chunk of (0, common_1.chunkArray)(repos, 500)) { await db_1.database.saveRepos(chunk.map(transformers_1.transformRepoViewToRepo)); updateMessage(`Saved ${chunk.length} out of ${repos.length} repos in local database now`); } updateMessage(`Saved ${repos.length} repos in local database`); return; }); return subjects; }; exports.fetchReposForSubjects = fetchReposForSubjects; const fetchProfilesForSubjects = async () => { const emitter = new events_1.default(); const subjects = await (0, loader_1.withLoader)(`Fetching profiles for subjects...`, async (updateMessage) => { emitter.on("update", ({ total, profileCount }) => { updateMessage(`Fetched ${profileCount} profiles out of ${total}`); }); const dids = await db_1.database.getMissingProfileDids(); updateMessage(`Found ${dids.length} profiles to be fetched`); if (dids.length === 0) { return; } const profiles = await (0, atproto_1.getProfiles)({ dids }, emitter); updateMessage(`Fetched ${profiles.length} profiles. Saving in local database now`); for (const chunk of (0, common_1.chunkArray)(profiles, 500)) { await db_1.database.saveProfiles(chunk.map(transformers_1.transformProfileViewToProfile)); updateMessage(`Saved ${chunk.length} out of ${profiles.length} profiles in local database now`); } updateMessage(`Saved ${profiles.length} profiles in local database`); return; }); return subjects; }; exports.fetchProfilesForSubjects = fetchProfilesForSubjects; const fetchRecordsForSubjects = async () => { const emitter = new events_1.default(); const subjects = await (0, loader_1.withLoader)(`Fetching records for subjects...`, async (updateMessage) => { emitter.on("update", ({ total, recordCount }) => { updateMessage(`Fetched ${recordCount} records out of ${total}`); }); const uris = await db_1.database.getMissingRecordUris(); updateMessage(`Found ${uris.length} missing records to be fetched`); if (uris.length === 0) { return; } const records = await (0, atproto_1.getRecords)({ uris }, emitter); updateMessage(`Fetched ${records.length} records. Saving in local database now`); for (const chunk of (0, common_1.chunkArray)(records, 500)) { await db_1.database.saveRecords(chunk.map(transformers_1.transformRecordViewToRecord)); updateMessage(`Saved ${chunk.length} out of ${records.length} records in local database`); } updateMessage(`Saved ${records.length} records in local database`); return; }); return subjects; }; exports.fetchRecordsForSubjects = fetchRecordsForSubjects; function looksLikeRegex(str) { // Check for common regex metacharacters const regexChars = /[.*+?^${}()|[\]\\]/; return regexChars.test(str); } function smartMatch(x, y) { if (looksLikeRegex(x)) { try { const regex = new RegExp(x); return regex.test(y); } catch (error) { // If regex is invalid, fall back to literal match return y.includes(x); } } else { // Treat as literal string return y.includes(x); } } const processSubjects = async (options) => { let subjects = await db_1.database.listSubjects({ takendown: options.takendown, subjectType: options.type, cursor: options.cursor, limit: options.count, }); if (options.bio || options.keyword) { const keyword = `${options.bio || options.keyword}`.toLowerCase(); const keywords = keyword .split("||") .map((k) => k.trim()) .filter(Boolean); subjects = subjects.filter((subject) => { const { profile } = subject; if (!profile?.description) return false; const desc = profile.description.toLowerCase(); if (keywords.length === 1) { return desc.includes(keyword); } return keywords.some((k) => desc.includes(k)); }); } if (options.email) { const keyword = options.email.toLowerCase(); subjects = subjects.filter((subject) => { const { email } = subject; if (!email) return false; return smartMatch(keyword, email.toLowerCase()); }); } if (options.keyword) { const keyword = options.keyword.toLowerCase(); const keywords = keyword .split("||") .map((k) => k.trim()) .filter(Boolean); subjects = subjects.filter((subject) => { const recordValue = JSON.parse(subject.value || "{}"); const text = recordValue.text?.toLowerCase(); if (!text) return false; if (keywords.length === 1) { return text.includes(keyword); } return keywords.some((k) => text.includes(k)); }); } if (options.riskScore) { subjects = subjects.filter((subject) => { if (!options.riskScore) return; const hcap = subject.threatSignatures || []; const riskScore = hcap.find((h) => h.property === "riskScore")?.value; return riskScore && riskScore > options.riskScore; }); } if (options.lastCountry?.length) { const countriesToMatch = options.lastCountry.map((c) => c.toLowerCase()); subjects = subjects.filter((subject) => { const hcap = subject.threatSignatures || []; const lastCountry = hcap.find((h) => h.property === "lastSigninCountry")?.value; if (!lastCountry) return false; return countriesToMatch.includes(lastCountry.toLowerCase()); }); } return subjects.map((s) => (0, exports.filterSubjectColumns)(s, options.columns?.split(","))); }; exports.processSubjects = processSubjects; const filterSubjectColumns = (subject, columns) => { if (!columns) return subject; const data = {}; columns.forEach((column) => { if (column in subject) { // @ts-ignore data[column] = subject[column]; } }); return data; }; exports.filterSubjectColumns = filterSubjectColumns;