auron
Version:
Interact with your ATProto labeler from your terminal
311 lines (275 loc) • 9 kB
text/typescript
import EventEmitter from "events";
import { withLoader } from "../utils/loader";
import {
getProfiles,
getQueueItems,
getRecords,
getRepos,
} from "../api/atproto";
import { database } from "../services/db";
import { ToolsOzoneModerationDefs } from "@atproto/api";
import {
transformRecordViewToRecord,
transformRepoViewToRepo,
transformProfileViewToProfile,
transformStatusToSubject,
} from "../services/subject/transformers";
import { chunkArray } from "@atproto/common";
import { RecordRow } from "../schemas/record";
import { RepoRow } from "../schemas/repo";
import { SubjectRow } from "../schemas/subject";
export const fetchQueueItems = async (options: {
count: number;
cursor?: string;
reviewState?: string;
takendown?: boolean;
}) => {
const emitter = new EventEmitter();
const subjects = await withLoader(
`Fetching queue items...`,
async (updateMessage) => {
emitter.on("update", ({ nextCursor, subjectCount, maxCount }) => {
updateMessage(
`Fetched page with cursor: ${nextCursor}, total subjects: ${subjectCount} out of ${maxCount}`
);
});
const { cursor, subjectStatuses } = await getQueueItems(
{
maxCount: options.count ? Number(options.count) : undefined,
reviewState: options.reviewState,
takendown: options.takendown,
cursor: options.cursor,
},
emitter
);
updateMessage(
`Fetched ${subjectStatuses.length} queue items. Cursor: ${cursor}`
);
return subjectStatuses;
}
);
return subjects;
};
export const saveQueueItems = async (
subjects: ToolsOzoneModerationDefs.SubjectStatusView[]
) => {
await withLoader(
`Storing ${subjects.length} queue items...`,
async (updateMessage) => {
if (subjects.length === 0) {
updateMessage("No queue items to store");
throw new Error("No queue items to store");
}
for (const chunk of chunkArray(subjects, 500)) {
await database.insertSubjects(chunk.map(transformStatusToSubject));
updateMessage(
`Stored ${chunk.length} out of ${subjects.length} queue items in local database`
);
}
updateMessage(`Stored ${subjects.length} queue items in local database`);
}
);
};
export const fetchReposForSubjects = async () => {
const emitter = new EventEmitter();
const subjects = await withLoader(
`Fetching repos for subjects...`,
async (updateMessage) => {
emitter.on("update", ({ total, repoCount }) => {
updateMessage(`Fetched ${repoCount} repos out of ${total}`);
});
const dids = await database.getMissingRepoDids();
updateMessage(`Found ${dids.length} missing repos to be fetched`);
if (dids.length === 0) {
return;
}
const repos = await getRepos({ dids }, emitter);
updateMessage(
`Fetched ${repos.length} repos. Saving in local database now`
);
for (const chunk of chunkArray(repos, 500)) {
await database.saveRepos(chunk.map(transformRepoViewToRepo));
updateMessage(
`Saved ${chunk.length} out of ${repos.length} repos in local database now`
);
}
updateMessage(`Saved ${repos.length} repos in local database`);
return;
}
);
return subjects;
};
export const fetchProfilesForSubjects = async () => {
const emitter = new EventEmitter();
const subjects = await withLoader(
`Fetching profiles for subjects...`,
async (updateMessage) => {
emitter.on("update", ({ total, profileCount }) => {
updateMessage(`Fetched ${profileCount} profiles out of ${total}`);
});
const dids = await database.getMissingProfileDids();
updateMessage(`Found ${dids.length} profiles to be fetched`);
if (dids.length === 0) {
return;
}
const profiles = await getProfiles({ dids }, emitter);
updateMessage(
`Fetched ${profiles.length} profiles. Saving in local database now`
);
for (const chunk of chunkArray(profiles, 500)) {
await database.saveProfiles(chunk.map(transformProfileViewToProfile));
updateMessage(
`Saved ${chunk.length} out of ${profiles.length} profiles in local database now`
);
}
updateMessage(`Saved ${profiles.length} profiles in local database`);
return;
}
);
return subjects;
};
export const fetchRecordsForSubjects = async () => {
const emitter = new EventEmitter();
const subjects = await withLoader(
`Fetching records for subjects...`,
async (updateMessage) => {
emitter.on("update", ({ total, recordCount }) => {
updateMessage(`Fetched ${recordCount} records out of ${total}`);
});
const uris = await database.getMissingRecordUris();
updateMessage(`Found ${uris.length} missing records to be fetched`);
if (uris.length === 0) {
return;
}
const records = await getRecords({ uris }, emitter);
updateMessage(
`Fetched ${records.length} records. Saving in local database now`
);
for (const chunk of chunkArray(records, 500)) {
await database.saveRecords(chunk.map(transformRecordViewToRecord));
updateMessage(
`Saved ${chunk.length} out of ${records.length} records in local database`
);
}
updateMessage(`Saved ${records.length} records in local database`);
return;
}
);
return subjects;
};
function looksLikeRegex(str: string) {
// Check for common regex metacharacters
const regexChars = /[.*+?^${}()|[\]\\]/;
return regexChars.test(str);
}
function smartMatch(x: string, y: string) {
if (looksLikeRegex(x)) {
try {
const regex = new RegExp(x);
return regex.test(y);
} catch (error) {
// If regex is invalid, fall back to literal match
return y.includes(x);
}
} else {
// Treat as literal string
return y.includes(x);
}
}
export const processSubjects = async (options: {
type?: string;
cursor?: string;
count?: number;
bio?: string;
keyword?: string;
riskScore?: number;
columns?: string;
takendown?: boolean;
lastCountry?: string[];
email?: string;
}) => {
let subjects = await database.listSubjects({
takendown: options.takendown,
subjectType: options.type,
cursor: options.cursor,
limit: options.count,
});
if (options.bio || options.keyword) {
const keyword = `${options.bio || options.keyword}`.toLowerCase();
const keywords = keyword
.split("||")
.map((k) => k.trim())
.filter(Boolean);
subjects = subjects.filter((subject) => {
const { profile } = subject;
if (!profile?.description) return false;
const desc = profile.description.toLowerCase();
if (keywords.length === 1) {
return desc.includes(keyword);
}
return keywords.some((k) => desc.includes(k));
});
}
if (options.email) {
const keyword = options.email.toLowerCase();
subjects = subjects.filter((subject) => {
const { email } = subject;
if (!email) return false;
return smartMatch(keyword, email.toLowerCase());
});
}
if (options.keyword) {
const keyword = options.keyword.toLowerCase();
const keywords = keyword
.split("||")
.map((k) => k.trim())
.filter(Boolean);
subjects = subjects.filter((subject) => {
const recordValue = JSON.parse(subject.value || "{}");
const text = recordValue.text?.toLowerCase();
if (!text) return false;
if (keywords.length === 1) {
return text.includes(keyword);
}
return keywords.some((k) => text.includes(k));
});
}
if (options.riskScore) {
subjects = subjects.filter((subject) => {
if (!options.riskScore) return;
const hcap = subject.threatSignatures || [];
const riskScore = hcap.find(
(h: Record<string, unknown>) => h.property === "riskScore"
)?.value;
return riskScore && riskScore > options.riskScore;
});
}
if (options.lastCountry?.length) {
const countriesToMatch = options.lastCountry.map((c) => c.toLowerCase());
subjects = subjects.filter((subject) => {
const hcap = subject.threatSignatures || [];
const lastCountry = hcap.find(
(h: Record<string, unknown>) => h.property === "lastSigninCountry"
)?.value;
if (!lastCountry) return false;
return countriesToMatch.includes(lastCountry.toLowerCase());
});
}
return subjects.map((s) =>
filterSubjectColumns(s, options.columns?.split(","))
);
};
export const filterSubjectColumns = (
subject: SubjectRow & RepoRow & RecordRow,
columns?: string[]
): Record<string, any> => {
if (!columns) return subject;
const data: Record<string, any> = {};
columns.forEach((column) => {
if (column in subject) {
// @ts-ignore
data[column] = subject[column] as any;
}
});
return data;
};