coldsky
Version:
Library and the app for BlueSky
425 lines (356 loc) • 14.3 kB
JavaScript
// @ts-check
import { isCompactPost } from '..';
import { streamEvery } from '../../../package/akpa';
import { ColdskyAgent } from '../../coldsky-agent';
import { isPromise } from '../../is-promise';
import { plcDirectoryHistoryRaw } from '../../plc-directory';
import { breakFeedURI, detectProfileURL, likelyDID, shortenDID, unwrapShortDID, unwrapShortHandle } from '../../shorten';
import { breakIntoWords } from '../capture-records/compact-post-words';
import { extractKnownArguments } from './cached-store';
import { getPostOnly } from './get-post-only';
import { syncRepo } from './sync-repo';
/**
* @typedef {{
* shortDID: string | null | undefined,
* searchQuery: string | null | undefined,
* likesAndReposts?: boolean | undefined,
* agent_getProfile_throttled: (did) => ReturnType<import('@atproto/api').BskyAgent['getProfile']>,
* agent_resolveHandle_throttled: (handle) => ReturnType<import('@atproto/api').BskyAgent['resolveHandle']>,
* agent_searchPosts_throttled: (q: string, limit: number | undefined, sort: string | undefined, cursor?: string) => ReturnType<import('@atproto/api').BskyAgent['app']['bsky']['feed']['searchPosts']>,
* agent_getRepoRecord_throttled: (repo, rkey, collection) => ReturnType<import('@atproto/api').BskyAgent['com']['atproto']['repo']['getRecord']>,
* dbStore: ReturnType<typeof import('../define-cache-indexedDB-store').defineCacheIndexedDBStore>
* }} Args
*/
/**
* @param {Args} args
* @returns {[] | AsyncGenerator<import('.').IncrementalMatchCompactPosts>}
*/
export function searchPostsIncrementally(args) {
const { shortDID, searchQuery } = args;
if (shortDID) {
return searchAccountHistoryPostsIncrementally(args);
} else if (!searchQuery) {
return [];
} else {
return searchAllPostsIncrementally(args);
}
}
/**
* @param {Args} args
*/
export async function* searchAccountHistoryPostsIncrementally(args) {
const { shortDID, searchQuery: searchQueryOriginal, likesAndReposts, dbStore, agent_getProfile_throttled, agent_searchPosts_throttled } = args;
const knownArgs = searchQueryOriginal ? extractKnownArguments(searchQueryOriginal) : undefined;
const searchQuery = knownArgs ? knownArgs.reduced : searchQueryOriginal;
const hasSearch = searchQuery || knownArgs?.to?.length;
let REPORT_UPDATES_FREQUENCY_MSEC = 700;
const cachedMatchesPromise = dbStore.searchPosts(shortDID, searchQuery, likesAndReposts);
/** @type {Set<string> | undefined} */
const missingLikesAndReposts = !likesAndReposts ? undefined : new Set();
const allCachedHistoryPromise = !hasSearch ? cachedMatchesPromise :
dbStore.searchPosts(shortDID, undefined, likesAndReposts, missingLikesAndReposts);
const plcDirHistoryPromise = plcDirectoryHistoryRaw(/** @type {string} */(shortDID));
let lastSearchReport = 0;
/** @type {import('..').CompactPost[] | undefined} */
let processedBatch;
let anyUpdates = false;
/** @type {Set<string> | undefined | Promise<Set<string> | undefined>} */
let toShortDIDsPromise = !knownArgs?.to?.length ? undefined : Promise.all(
knownArgs.to.map(handle => resolveUserLocalOrRemote(handle))).then(shortDIDs => {
const res = /** @type {Set<string>} */(new Set(shortDIDs.filter(Boolean)));
return (toShortDIDsPromise = res.size ? res : undefined);
});
/** @type {import('.').IncrementalMatchCompactPosts} */
let lastMatches = await filterWithTo(await cachedMatchesPromise, toShortDIDsPromise);
const allHistory = await filterWithTo(await allCachedHistoryPromise, toShortDIDsPromise);
/** @type {Set<string> | undefined} */
let knownHistoryUri = new Set((allHistory || []).map(rec => rec.uri));
if (lastMatches?.length) {
lastMatches.cachedOnly = true;
lastMatches.processedAllCount = knownHistoryUri.size;
lastMatches.processedBatch = allHistory.slice();
lastSearchReport = Date.now();
yield lastMatches;
}
const plcDirHistoryRecords = await plcDirHistoryPromise;
dbStore.capturePlcDirectoryEntries(plcDirHistoryRecords);
const profile = await dbStore.getProfile(/** @type {string} */(shortDID));
const parallelSearch = streamEvery(
/** @param {import('../../../package/akpa').StreamParameters<import('..').CompactPost[] | undefined>} streaming */
streaming => {
const words = breakIntoWords(searchQuery || '');
words.unshift(searchQuery || '');
const waitForAllCompletionPromises = [];
let fullRepoIndexed = false;
const waitUntilPageIndexed = Promise.race([
fetchPaginatedAndIndex(),
downloadFullRepoAndIndex()]);
waitForAllCompletionPromises.push(waitUntilPageIndexed);
for (const word of words) {
waitForAllCompletionPromises.push(searchForWord(word));
}
Promise.all(waitForAllCompletionPromises.map(p => p.catch(() => { })))
.then(() => {
streaming.complete();
});
async function fetchPaginatedAndIndex() {
for await (const batch of indexAccountHistoryPostsFromRepository(args)) {
if (fullRepoIndexed) return;
streaming.yield(batch);
}
}
/** @param {string} word */
async function searchForWord(word) {
const wordSearchQuery =
'from:' + unwrapShortHandle(profile?.handle || '') +
' ' + word;
const searchResult = await agent_searchPosts_throttled(wordSearchQuery, undefined, 'latest');
const batch = [];
if (searchResult?.data?.posts?.length) {
for (const postRaw of searchResult.data.posts) {
if (fullRepoIndexed) return;
const post = dbStore.capturePostView(postRaw, Date.now());
if (post) batch.push(post);
}
}
streaming.yield(await filterWithTo(batch, toShortDIDsPromise));
}
async function downloadFullRepoAndIndex() {
const postsAndProfiles = await syncRepo({
...args,
shortDID
});
const ownPostsOnly =
!postsAndProfiles ? [] :
/** @type {import('..').CompactPost[]} */(
postsAndProfiles.filter(post =>
isCompactPost(post) && post.shortDID === shortDID)
);
streaming.yield(await filterWithTo(ownPostsOnly, toShortDIDsPromise));
fullRepoIndexed = true;
}
});
/** @type {ReturnType<typeof getPostOnly>[]} */
let queuedMissingLikesAndReposts = [];
const addMissingLikesAndRepostsToTheQueue = () => {
if (missingLikesAndReposts && missingLikesAndReposts.size > queuedMissingLikesAndReposts.length) {
const arr = [...missingLikesAndReposts];
for (let i = queuedMissingLikesAndReposts.length; i < arr.length; i++) {
const uri = arr[i];
queuedMissingLikesAndReposts[i] = getPostOnly({
uri,
dbStore,
agent_getRepoRecord_throttled: args.agent_getRepoRecord_throttled
});
}
}
};
for await (const searchResultRaw of parallelSearch) {
const searchResult = await filterWithTo(searchResultRaw || [], toShortDIDsPromise)
if (searchResult) {
if (!processedBatch) processedBatch = searchResult;
else processedBatch = processedBatch.concat(searchResult);
}
if (Date.now() - lastSearchReport > REPORT_UPDATES_FREQUENCY_MSEC) {
/** @type {import('.').IncrementalMatchCompactPosts} */
const newMatches = await filterWithTo(
await dbStore.searchPosts(shortDID, searchQuery, likesAndReposts, missingLikesAndReposts),
toShortDIDsPromise);
addMissingLikesAndRepostsToTheQueue();
lastMatches = newMatches;
lastSearchReport = Date.now();
anyUpdates = false;
newMatches.processedBatch = processedBatch;
if (!newMatches.processedAllCount)
newMatches.processedAllCount = knownHistoryUri.size;
processedBatch = undefined;
yield newMatches;
lastSearchReport = Date.now();
}
}
if (queuedMissingLikesAndReposts.length) {
await Promise.all(queuedMissingLikesAndReposts);
}
/** @type {import('.').IncrementalMatchCompactPosts} */
const finalMatches = await filterWithTo(
await dbStore.searchPosts(shortDID, searchQuery, likesAndReposts, missingLikesAndReposts),
toShortDIDsPromise);
addMissingLikesAndRepostsToTheQueue();
finalMatches.processedBatch = processedBatch;
if (!finalMatches.processedAllCount)
finalMatches.processedAllCount = knownHistoryUri.size;
processedBatch = undefined;
yield finalMatches;
/** @param {string} handle */
function resolveUserLocalOrRemote(handle) {
if (likelyDID(handle)) return handle;
const dbResolve = dbStore.getProfile(unwrapShortHandle(handle.trim().replace(/^@/, '')));
if (dbResolve) {
if (!isPromise(dbResolve)) return dbResolve.shortDID;
return dbResolve.then(
res => {
if (res) return res.shortDID;
return resolveRemote();
},
err => {
return resolveRemote();
});
} else {
return resolveRemote();
}
async function resolveRemote() {
let dt;
try {
dt = await agent_getProfile_throttled(handle);
} catch (getProfileError) {
return;
}
if (!dt.data) return;
const profile = dbStore.captureProfileView(dt.data, Date.now());
return profile.shortDID;
}
}
}
/**
* @param {Args} args
*/
async function* indexAccountHistoryPostsFromRepository(args) {
const { shortDID, dbStore } = args;
const plcDirHistoryPromise = plcDirectoryHistoryRaw(/** @type {string} */(shortDID));
/** @type {import('..').CompactPost[] | undefined} */
let processedBatch;
const plcDirHistoryRecords = await plcDirHistoryPromise;
dbStore.capturePlcDirectoryEntries(plcDirHistoryRecords);
const profile = await dbStore.getProfile(/** @type {string} */(shortDID));
const pdsAgent = new ColdskyAgent({
service: profile?.history?.[0].pds
});
let cursor = '';
const fullDID = unwrapShortDID(/** @type {string} */(shortDID));
while (true) {
const moreData = await pdsAgent.com.atproto.repo.listRecords({
repo: unwrapShortDID(/** @type {string} */(shortDID)),
cursor,
collection: 'app.bsky.feed.post',
limit: Math.random() * 10 + 88
});
if (moreData?.data?.records?.length) {
for (const rec of moreData.data.records) {
/** @type {import('../../firehose').FirehoseRepositoryRecord<'app.bsky.feed.post'>} */
const recEx = /** @type {*} */(rec.value);
recEx.$type = 'app.bsky.feed.post';
recEx.uri = rec.uri;
recEx.repo = fullDID;
const post = /** @type {import('..').CompactPost} */(dbStore.captureRecord(recEx, Date.now()));
if (post) {
if (!processedBatch) processedBatch = [post];
else processedBatch.push(post);
}
}
}
yield processedBatch;
if (!moreData?.data?.cursor) break;
cursor = moreData.data.cursor;
}
}
/**
* @param {Args} args
* @returns {AsyncGenerator<import('.').IncrementalMatchCompactPosts>}
*/
async function* searchAllPostsIncrementally(args) {
const { searchQuery, dbStore, agent_searchPosts_throttled } = args;
const searchStringSanitised = (searchQuery || '')
.trim()
.replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, ' ')
.replace(/\s+/g, ' ');
let remoteSearchLatestPromise = agent_searchPosts_throttled(
searchStringSanitised,
97,
'latest');
const localResultsPromise = dbStore.searchPosts(undefined, searchQuery);
/** @type {import('..').MatchCompactPost[] & { cachedOnly?: boolean }} */
const localResults = await localResultsPromise;
if (localResults?.length) {
localResults.cachedOnly = true;
yield localResults;
}
let cursor = '';
while (true) {
const remoteSearchData = (await remoteSearchLatestPromise).data;
const now = Date.now();
for (const postRaw of remoteSearchData?.posts || []) {
dbStore.capturePostView(postRaw, now);
}
const refreshedResults = await dbStore.searchPosts(undefined, searchQuery);
if (remoteSearchData?.cursor) {
cursor = remoteSearchData.cursor;
remoteSearchLatestPromise = agent_searchPosts_throttled(
searchStringSanitised,
97,
'latest',
cursor);
}
if (refreshedResults?.length) {
yield refreshedResults;
}
if (!remoteSearchData?.cursor) break;
}
}
/**
* @param {import('..').MatchCompactPost[]} matches
* @param {Set<string> | undefined | Promise<Set<string> | undefined} toShortDIDsPromise
*/
function filterWithTo(matches, toShortDIDsPromise) {
if (!toShortDIDsPromise) return matches;
if (isPromise(toShortDIDsPromise)) {
return toShortDIDsPromise.then(shortDIDs => {
if (!shortDIDs) return matches;
else return filterWithToSet(matches, shortDIDs);
});
} else {
return filterWithToSet(matches, toShortDIDsPromise);
}
}
/**
* @param {import('..').MatchCompactPost[]} matches
* @param {Set<string>} toShortDIDs
*/
function filterWithToSet(matches, toShortDIDs) {
const filtered = matches.filter(match => {
if (match.replyTo) {
const ref = breakFeedURI(match.replyTo);
if (ref && toShortDIDs.has(ref.shortDID)) return true;
}
if (match.threadStart) {
const ref = breakFeedURI(match.threadStart);
if (ref && toShortDIDs.has(ref.shortDID)) return true;
}
if (match.quoting?.length) {
for (const q of match.quoting) {
const ref = breakFeedURI(q);
if (ref && toShortDIDs.has(ref.shortDID)) return true;
}
}
if (match.embeds?.length) {
for (const emb of match.embeds) {
if (emb.url) {
const ref = breakFeedURI(emb.url);
if (ref && toShortDIDs.has(ref.shortDID)) return true;
const prof = detectProfileURL(emb.url);
if (prof && toShortDIDs.has(prof)) return true;
}
}
}
if (match.facets?.length) {
for (const fa of match.facets) {
if (fa.url) {
const ref = breakFeedURI(fa.url);
if (ref && toShortDIDs.has(ref.shortDID)) return true;
const prof = detectProfileURL(fa.url);
if (prof && toShortDIDs.has(prof)) return true;
}
}
}
});
return filtered;
}