auron
Version:
Interact with your ATProto labeler from your terminal
220 lines (219 loc) • 9.56 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.filterSubjectColumns = exports.processSubjects = exports.fetchRecordsForSubjects = exports.fetchProfilesForSubjects = exports.fetchReposForSubjects = exports.saveQueueItems = exports.fetchQueueItems = void 0;
const events_1 = __importDefault(require("events"));
const loader_1 = require("../utils/loader");
const atproto_1 = require("../api/atproto");
const db_1 = require("../services/db");
const transformers_1 = require("../services/subject/transformers");
const common_1 = require("@atproto/common");
const fetchQueueItems = async (options) => {
const emitter = new events_1.default();
const subjects = await (0, loader_1.withLoader)(`Fetching queue items...`, async (updateMessage) => {
emitter.on("update", ({ nextCursor, subjectCount, maxCount }) => {
updateMessage(`Fetched page with cursor: ${nextCursor}, total subjects: ${subjectCount} out of ${maxCount}`);
});
const { cursor, subjectStatuses } = await (0, atproto_1.getQueueItems)({
maxCount: options.count ? Number(options.count) : undefined,
reviewState: options.reviewState,
takendown: options.takendown,
cursor: options.cursor,
}, emitter);
updateMessage(`Fetched ${subjectStatuses.length} queue items. Cursor: ${cursor}`);
return subjectStatuses;
});
return subjects;
};
exports.fetchQueueItems = fetchQueueItems;
const saveQueueItems = async (subjects) => {
await (0, loader_1.withLoader)(`Storing ${subjects.length} queue items...`, async (updateMessage) => {
if (subjects.length === 0) {
updateMessage("No queue items to store");
throw new Error("No queue items to store");
}
for (const chunk of (0, common_1.chunkArray)(subjects, 500)) {
await db_1.database.insertSubjects(chunk.map(transformers_1.transformStatusToSubject));
updateMessage(`Stored ${chunk.length} out of ${subjects.length} queue items in local database`);
}
updateMessage(`Stored ${subjects.length} queue items in local database`);
});
};
exports.saveQueueItems = saveQueueItems;
const fetchReposForSubjects = async () => {
const emitter = new events_1.default();
const subjects = await (0, loader_1.withLoader)(`Fetching repos for subjects...`, async (updateMessage) => {
emitter.on("update", ({ total, repoCount }) => {
updateMessage(`Fetched ${repoCount} repos out of ${total}`);
});
const dids = await db_1.database.getMissingRepoDids();
updateMessage(`Found ${dids.length} missing repos to be fetched`);
if (dids.length === 0) {
return;
}
const repos = await (0, atproto_1.getRepos)({ dids }, emitter);
updateMessage(`Fetched ${repos.length} repos. Saving in local database now`);
for (const chunk of (0, common_1.chunkArray)(repos, 500)) {
await db_1.database.saveRepos(chunk.map(transformers_1.transformRepoViewToRepo));
updateMessage(`Saved ${chunk.length} out of ${repos.length} repos in local database now`);
}
updateMessage(`Saved ${repos.length} repos in local database`);
return;
});
return subjects;
};
exports.fetchReposForSubjects = fetchReposForSubjects;
const fetchProfilesForSubjects = async () => {
const emitter = new events_1.default();
const subjects = await (0, loader_1.withLoader)(`Fetching profiles for subjects...`, async (updateMessage) => {
emitter.on("update", ({ total, profileCount }) => {
updateMessage(`Fetched ${profileCount} profiles out of ${total}`);
});
const dids = await db_1.database.getMissingProfileDids();
updateMessage(`Found ${dids.length} profiles to be fetched`);
if (dids.length === 0) {
return;
}
const profiles = await (0, atproto_1.getProfiles)({ dids }, emitter);
updateMessage(`Fetched ${profiles.length} profiles. Saving in local database now`);
for (const chunk of (0, common_1.chunkArray)(profiles, 500)) {
await db_1.database.saveProfiles(chunk.map(transformers_1.transformProfileViewToProfile));
updateMessage(`Saved ${chunk.length} out of ${profiles.length} profiles in local database now`);
}
updateMessage(`Saved ${profiles.length} profiles in local database`);
return;
});
return subjects;
};
exports.fetchProfilesForSubjects = fetchProfilesForSubjects;
const fetchRecordsForSubjects = async () => {
const emitter = new events_1.default();
const subjects = await (0, loader_1.withLoader)(`Fetching records for subjects...`, async (updateMessage) => {
emitter.on("update", ({ total, recordCount }) => {
updateMessage(`Fetched ${recordCount} records out of ${total}`);
});
const uris = await db_1.database.getMissingRecordUris();
updateMessage(`Found ${uris.length} missing records to be fetched`);
if (uris.length === 0) {
return;
}
const records = await (0, atproto_1.getRecords)({ uris }, emitter);
updateMessage(`Fetched ${records.length} records. Saving in local database now`);
for (const chunk of (0, common_1.chunkArray)(records, 500)) {
await db_1.database.saveRecords(chunk.map(transformers_1.transformRecordViewToRecord));
updateMessage(`Saved ${chunk.length} out of ${records.length} records in local database`);
}
updateMessage(`Saved ${records.length} records in local database`);
return;
});
return subjects;
};
exports.fetchRecordsForSubjects = fetchRecordsForSubjects;
function looksLikeRegex(str) {
// Check for common regex metacharacters
const regexChars = /[.*+?^${}()|[\]\\]/;
return regexChars.test(str);
}
function smartMatch(x, y) {
if (looksLikeRegex(x)) {
try {
const regex = new RegExp(x);
return regex.test(y);
}
catch (error) {
// If regex is invalid, fall back to literal match
return y.includes(x);
}
}
else {
// Treat as literal string
return y.includes(x);
}
}
const processSubjects = async (options) => {
let subjects = await db_1.database.listSubjects({
takendown: options.takendown,
subjectType: options.type,
cursor: options.cursor,
limit: options.count,
});
if (options.bio || options.keyword) {
const keyword = `${options.bio || options.keyword}`.toLowerCase();
const keywords = keyword
.split("||")
.map((k) => k.trim())
.filter(Boolean);
subjects = subjects.filter((subject) => {
const { profile } = subject;
if (!profile?.description)
return false;
const desc = profile.description.toLowerCase();
if (keywords.length === 1) {
return desc.includes(keyword);
}
return keywords.some((k) => desc.includes(k));
});
}
if (options.email) {
const keyword = options.email.toLowerCase();
subjects = subjects.filter((subject) => {
const { email } = subject;
if (!email)
return false;
return smartMatch(keyword, email.toLowerCase());
});
}
if (options.keyword) {
const keyword = options.keyword.toLowerCase();
const keywords = keyword
.split("||")
.map((k) => k.trim())
.filter(Boolean);
subjects = subjects.filter((subject) => {
const recordValue = JSON.parse(subject.value || "{}");
const text = recordValue.text?.toLowerCase();
if (!text)
return false;
if (keywords.length === 1) {
return text.includes(keyword);
}
return keywords.some((k) => text.includes(k));
});
}
if (options.riskScore) {
subjects = subjects.filter((subject) => {
if (!options.riskScore)
return;
const hcap = subject.threatSignatures || [];
const riskScore = hcap.find((h) => h.property === "riskScore")?.value;
return riskScore && riskScore > options.riskScore;
});
}
if (options.lastCountry?.length) {
const countriesToMatch = options.lastCountry.map((c) => c.toLowerCase());
subjects = subjects.filter((subject) => {
const hcap = subject.threatSignatures || [];
const lastCountry = hcap.find((h) => h.property === "lastSigninCountry")?.value;
if (!lastCountry)
return false;
return countriesToMatch.includes(lastCountry.toLowerCase());
});
}
return subjects.map((s) => (0, exports.filterSubjectColumns)(s, options.columns?.split(",")));
};
exports.processSubjects = processSubjects;
const filterSubjectColumns = (subject, columns) => {
if (!columns)
return subject;
const data = {};
columns.forEach((column) => {
if (column in subject) {
// @ts-ignore
data[column] = subject[column];
}
});
return data;
};
exports.filterSubjectColumns = filterSubjectColumns;