coldsky
Version:
Library and the app for BlueSky
594 lines (491 loc) • 18.6 kB
JavaScript
// @ts-check
import Dexie from 'dexie';
import { defineStore } from '.';
import { breakFeedURI, breakFeedURIPostOnly, breakPostURL, likelyDID, makeFeedUri, shortenDID, shortenHandle } from '../shorten';
import { createRepoData } from './repo-data';
import { breakIntoWords, detectWordStartsNormalized } from './capture-records/compact-post-words';
import Fuse from 'fuse.js';
import { createSpeculativePost } from './capture-records/speculative-post';
export const DEFAULT_DB_NAME = 'coldsky-db-11may2024';
export const DEFAULT_DB_DEBOUNCE_TIME = 2000;
export const UPDATE_DB_MAX_TIME = 10000;
/**
* @param {string} [dbName]
*/
export function defineCacheIndexedDBStore(dbName) {
const db =
/**
* @type {Dexie & {
* posts: import('dexie').Table<import('.').CompactPost, string>,
* profiles: import('dexie').Table<import('.').CompactProfile, string>,
* repoSync: import('dexie').Table<{shortDID: string, lastSyncRev: string }>
* }}
*/(new Dexie(dbName || DEFAULT_DB_NAME));
// this is to clean up old posts, with incorrect URI
db.version(3).stores({
posts: 'uri, shortDID, replyTo, threadStart, *quoting, *words',
profiles: 'shortDID, *handle, *words'
});
// incorrect URIs: at:// prefix missing
db.version(4).stores({
posts: null,
profiles: 'shortDID, *handle, *words'
});
db.version(5).stores({
posts: 'uri, shortDID, replyTo, threadStart, *quoting, *words',
profiles: 'shortDID, *handle, *words'
});
// repoSync introduced
db.version(6).stores({
posts: 'uri, shortDID, replyTo, threadStart, *quoting, *words',
profiles: 'shortDID, *handle, *words',
repoSync: 'shortDID' //
});
// incorrect URI: missing a slash in the middle
db.version(7).stores({
posts: null,
profiles: 'shortDID, *handle, *words'
});
db.version(8).stores({
posts: 'uri, shortDID, replyTo, threadStart, *quoting, *words',
profiles: 'shortDID, *handle, *words',
repoSync: 'shortDID'
});
db.version(9).stores({
posts: 'uri, shortDID, replyTo, threadStart, *quoting, *words, *likedBy, repostedBy*',
profiles: 'shortDID, *handle, *words',
repoSync: 'shortDID'
})
// .upgrade(async tr => {
// await tr.table('repoSync').toCollection().modify(rsync => {
// // likes were not being captured, so full re-download is required now
// delete rsync.lastSyncRev;
// });
// // await tr.table('posts').toCollection().modify(post => {
// // if (post.likeCount)
// // post.likedBy = Array(post.likeCount).fill('?');
// // if (post.repostCount)
// // post.repostedBy = Array(post.repostCount).fill('?');
// // delete post.likeCount;
// // delete post.repostCount;
// // });
// });
db.version(10);
db.version(11);
const memStore = defineStore({
post: handlePostUpdate,
profile: handleProfileUpdate
});
/**
* @type {Map<string, import('.').CompactPost>}
*/
let outstandingPostUpdatesByURI = new Map();
/** @type {typeof outstandingPostUpdatesByURI} */
let outstandingPostUpdatesInProgressByURI = new Map();
/**
* @type {Map<string, import('.').CompactProfile>}
*/
let outstandingProfileUpdatesByShortDID = new Map();
/** @type {typeof outstandingProfileUpdatesByShortDID} */
let outstandingProfileUpdatesInProgressByShortDID = new Map();
var queueTimeoutDebounce;
var queueTimeoutMax;
return {
captureRecord: memStore.captureRecord,
captureThreadView: memStore.captureThreadView,
capturePostView: memStore.capturePostView,
captureProfileView: memStore.captureProfileView,
deleteRecord,
capturePlcDirectoryEntries: memStore.capturePLCDirectoryEntries,
getPostOnly,
getPostThread,
getProfile,
searchPosts,
searchProfiles,
getLastRepoSyncRev,
syncRepoWithData
};
function deleteRecord(rec) {
// TODO: reconcile memStore and IndexedDB
}
/**
* @param {import('.').CompactPost} post
*/
function handlePostUpdate(post) {
outstandingPostUpdatesByURI.set(
post.uri,
post);
queueUpdate();
}
/**
* @param {import('.').CompactProfile} profile
*/
function handleProfileUpdate(profile) {
outstandingProfileUpdatesByShortDID.set(profile.shortDID, profile);
queueUpdate();
}
function queueUpdate() {
if (outstandingPostUpdatesInProgressByURI.size || outstandingProfileUpdatesInProgressByShortDID.size) return;
if (!queueTimeoutMax) queueTimeoutMax = setTimeout(performUpdate, UPDATE_DB_MAX_TIME);
clearTimeout(queueTimeoutDebounce);
queueTimeoutDebounce = setTimeout(performUpdate, DEFAULT_DB_DEBOUNCE_TIME);
}
var currentBulkUpdate;
async function performUpdate() {
if (outstandingPostUpdatesInProgressByURI.size || outstandingProfileUpdatesInProgressByShortDID.size) return;
clearTimeout(queueTimeoutMax);
clearTimeout(queueTimeoutDebounce);
queueTimeoutMax = queueTimeoutDebounce = undefined;
let BULK_UPDATE_BATCH_COUNT = 1023;
currentBulkUpdate = (async () => {
while (outstandingPostUpdatesByURI.size || outstandingProfileUpdatesByShortDID.size) {
const postUpdates = [...outstandingPostUpdatesByURI.values()];
const profileUpdates = [...outstandingProfileUpdatesByShortDID.values()];
{
// push post updates to in-progress map
const tmp = outstandingPostUpdatesByURI;
outstandingPostUpdatesByURI = outstandingPostUpdatesInProgressByURI;
outstandingPostUpdatesInProgressByURI = tmp;
}
{
// push profile updates to in-progress map
const tmp = outstandingProfileUpdatesByShortDID;
outstandingProfileUpdatesByShortDID = outstandingProfileUpdatesInProgressByShortDID;
outstandingProfileUpdatesInProgressByShortDID = tmp;
}
for (let i = 0; i < Math.max(postUpdates.length, profileUpdates.length); i += BULK_UPDATE_BATCH_COUNT) {
if (i) {
await new Promise(resolve => setTimeout(resolve, 10));
}
const postBatch = postUpdates.slice(i, i + BULK_UPDATE_BATCH_COUNT);
const profileBatch = profileUpdates.slice(i, i + BULK_UPDATE_BATCH_COUNT);
const updateReport = {};
updateReport.postsTotal = postUpdates.length;
updateReport.profilesTotal = profileUpdates.length;
let postUpdatePromise;
if (postBatch.length) {
postUpdatePromise = db.posts.bulkPut(updateReport.posts = postBatch);
}
let profileUpdatePromise;
if (profileBatch.length) {
profileUpdatePromise = db.profiles.bulkPut(updateReport.profiles = profileBatch);
}
const startBulkUpdate = Date.now();
await postUpdatePromise;
await profileUpdatePromise;
console.log('dumping to indexedDB: ', updateReport, ' in ' + (Date.now() - startBulkUpdate).toLocaleString() + 'ms');
for (const post of postBatch) {
outstandingPostUpdatesInProgressByURI.delete(post.uri);
}
for (const profile of profileBatch) {
outstandingProfileUpdatesInProgressByShortDID.delete(profile.shortDID);
}
}
}
currentBulkUpdate = undefined;
})();
await currentBulkUpdate;
}
/**
* @param {string | undefined} uri
*/
function getPostOnly(uri) {
if (!uri) return;
const parsedURL = breakFeedURIPostOnly(uri) || breakPostURL(uri);
if (!parsedURL) return;
let repo = memStore.repos.get(parsedURL.repo);
if (repo) {
const existingPost = repo.posts.get(uri);
if (existingPost) return existingPost;
}
return db.posts.get(uri).then(post => {
if (!post) return;
// cache in memory now
if (!repo) {
repo = createRepoData(parsedURL.repo);
memStore.repos.set(parsedURL.repo, repo);
}
repo.posts.set(post.uri, post);
return post;
});
}
/**
* @param {string | undefined} url
* @returns {Promise<import('.').CompactThreadPostSet | undefined> | undefined}
*/
function getPostThread(url) {
if (!url) return;
return getPostThreadAsync(url);
}
/**
* @param {string} uri
* @returns {Promise<import('.').CompactThreadPostSet | undefined>}
*/
async function getPostThreadAsync(uri) {
const currentPostURIParsed = breakFeedURIPostOnly(uri);
if (!currentPostURIParsed) return;
const { shortDID, postID: currentPostID } = currentPostURIParsed;
let currentPost = outstandingPostUpdatesByURI.get(uri) || outstandingPostUpdatesInProgressByURI.get(uri);
if (!currentPost) currentPost = memStore.repos.get(shortDID)?.posts.get(currentPostID);
if (!currentPost) await db.posts.get(uri);
if (!currentPost) return;
let threadStartURI = currentPost.threadStart || uri;
const threadStartPostPromise = db.posts.get(threadStartURI);
const dbPosts = await db.posts.where('threadStart').equals(threadStartURI).toArray();
if (currentPost && !dbPosts.find(post => post.uri === currentPost.uri))
dbPosts.push(currentPost);
const threadStartPost = await threadStartPostPromise;
if (threadStartPost && !dbPosts.find(post => post.uri === threadStartPost.uri))
dbPosts.push(threadStartPost);
const uncachedPostsForThread = [
...outstandingPostUpdatesByURI.values(),
...outstandingPostUpdatesInProgressByURI.values()
].filter(
p => p.uri === currentPost?.uri ||
threadStartURI && p.threadStart === threadStartURI ||
p.uri === threadStartURI);
const postsByUri = new Map(dbPosts.concat(uncachedPostsForThread).map(p => [p.uri, p]));
const all = [...postsByUri.values()];
const current = postsByUri.get(uri) || createSpeculativePost(shortDID, uri);
let root = current?.threadStart ? postsByUri.get(current.threadStart) : undefined;
if (!root) {
const rootShortDID = breakFeedURIPostOnly(current.threadStart)?.shortDID;
if (rootShortDID && current.threadStart) {
const dbRoot = await db.posts.get(current.threadStart);
if (dbRoot) root = dbRoot;
else root = createSpeculativePost(rootShortDID, current.threadStart);
}
if (!root) root = current;
}
const knownUris = new Set(all.map(p => p.uri));
const completeMissing = all.slice();
while (true) {
const p = completeMissing.pop();
if (!p) break;
if (p.replyTo && !knownUris.has(p.replyTo)) {
const replyTo = await db.posts.get(p.replyTo);
if (replyTo) {
all.push(replyTo);
knownUris.add(replyTo.uri);
completeMissing.push(p);
} else {
const replyToShortDID = breakFeedURIPostOnly(p.replyTo)?.shortDID;
if (replyToShortDID) {
const speculative = createSpeculativePost(replyToShortDID, p.replyTo);
all.push(speculative);
knownUris.add(speculative.uri);
}
}
}
}
return { all, root, current };
}
/**
* @param {string | null | undefined} did
* @param {string | null | undefined} text
* @param {boolean} [likesAndReposts]
* @param {{ add(uri: string): void }} [missingLikesAndReposts]
* @returns {Promise<import('.').MatchCompactPost[]>}
*/
async function searchPosts(did, text, likesAndReposts, missingLikesAndReposts) {
const wordStarts = detectWordStartsNormalized(text, undefined);
if (!wordStarts?.length && !did) return [];
const words = breakIntoWords(text || '');
words.push(text || '');
const shortDID = shortenDID(did);
const wordMatcher = !wordStarts ?
(() => true) :
/** @param {string} w */(w) => wordStarts.includes(w)
/** @type {Map<string, import('.').CompactPost>} */
const map = new Map();
// search by both shortDID and words
const dbPostsQuery =
!shortDID ?
db.posts.where('words').anyOf(wordStarts || []) :
!wordStarts?.length ?
db.posts.where('shortDID').equals(shortDID) :
db.posts.where('shortDID').equals(shortDID).and(
post => !!post.words && post.words.some(wordMatcher));
const likesQuery = !likesAndReposts || !shortDID || !wordStarts?.length ? undefined :
db.posts.where('likedBy').anyOf([shortDID]).and(
post => !!post.words && post.words.some(wordMatcher) ||
!!missingLikesAndReposts && !!post.placeholder);
const repostsQuery = !likesAndReposts || !shortDID || !wordStarts?.length ? undefined :
db.posts.where('repostedBy').anyOf([shortDID]).and(
post => !!post.words && post.words.some(wordMatcher) ||
!!missingLikesAndReposts && !!post.placeholder);
const dbPostsPromise = dbPostsQuery.toArray();
const likesPromise = likesQuery?.toArray();
const repostsPromise = repostsQuery?.toArray();
const dbPosts = await dbPostsPromise;
const likes = await likesPromise;
const reposts = await repostsPromise;
const allPostsForShortDIDPromise = !shortDID ? undefined :
db.posts.where('shortDID').equals(shortDID).count();
for (const post of dbPosts) {
map.set(post.uri, post);
}
if (likes) {
for (const post of likes) {
if (post.placeholder) missingLikesAndReposts?.add(post.uri);
else map.set(post.uri, post);
}
}
if (reposts) {
for (const post of reposts) {
if (post.placeholder) missingLikesAndReposts?.add(post.uri);
else map.set(post.uri, post);
}
}
for (const uncachedPost of outstandingPostUpdatesInProgressByURI.values()) {
if (shortDID && uncachedPost.shortDID !== shortDID) continue;
if (uncachedPost.words?.some(wordMatcher)) {
map.set(uncachedPost.uri, uncachedPost);
}
}
for (const uncachedPost of outstandingPostUpdatesByURI.values()) {
if (shortDID && uncachedPost.shortDID !== shortDID) continue;
if (uncachedPost.words?.some(wordMatcher)) {
map.set(uncachedPost.uri, uncachedPost);
}
}
const allPosts = [...map.values()];
if (!text) {
allPosts?.sort((a1, a2) => (a2.asOf || 0) - (a1.asOf || 0));
return allPosts;
}
const FUSE_THRESHOLD = 0.7;
const fuse = new Fuse(allPosts, {
includeScore: true,
keys: ['text'],
includeMatches: true,
shouldSort: true,
findAllMatches: true,
ignoreLocation: true,
threshold: FUSE_THRESHOLD
});
const matches = fuse.search(text).filter(m => (m.score || 0) <= FUSE_THRESHOLD);
/**
* @type {import('.').MatchCompactPost[] & { processedAllCount?: number }}
*/
const compact = matches.map(fuseMatch => {
const joined = {
...fuseMatch,
...fuseMatch.item,
item: undefined,
searchWords: words
};
return joined;
});
if (allPostsForShortDIDPromise)
compact.processedAllCount = await allPostsForShortDIDPromise;
return compact;
}
/**
* @param {string | undefined} did
*/
function getProfile(did) {
if (likelyDID(did)) {
const shortDID = shortenDID(did);
if (!shortDID) return;
let repo = memStore.repos.get(shortDID);
if (repo && repo.profile) return repo.profile;
return db.profiles.get(shortDID).then(profile => {
if (!profile) return;
// cache in memory now
if (!repo) {
repo = createRepoData(shortDID);
memStore.repos.set(shortDID, repo);
}
repo.profile = profile;
return profile;
});
} else {
const shortHandle = shortenHandle(did);
if (!shortHandle) return;
const matchingProfiles = [];
for (const repo of memStore.repos.values()) {
if (repo.profile?.handle === shortHandle) matchingProfiles.push(repo.profile);
}
if (matchingProfiles.length > 1) return undefined; // can it happen???
if (matchingProfiles.length === 1) return matchingProfiles[0];
return db.profiles.where('handle').equals(shortHandle).toArray().then(profiles => {
if (profiles.length === 1) return profiles[0];
});
}
}
/**
* @param {string} text
* @param {{ max?: number }} [options]
* @returns {Promise<import('..').MatchCompactProfile[] | undefined>}
*/
async function searchProfiles(text, options) {
if (!text) return;
const wordStarts = detectWordStartsNormalized(text, undefined);
if (!wordStarts?.length) return;
const words = breakIntoWords(text);
words.push(text);
/** @type {Map<string, import('.').CompactProfile>} */
const map = new Map();
const dbProfiles = await db.profiles.where('words').anyOf(wordStarts).toArray();
for (const prof of dbProfiles) {
map.set(prof.shortDID, prof);
}
for (const repo of memStore.repos.values()) {
if (repo.profile) map.set(repo.profile.shortDID, repo.profile);
}
const allProfiles = [...map.values()];
const fuse = new Fuse(allProfiles, {
includeScore: true,
keys: ['handle', 'displayName', 'description'],
includeMatches: true,
shouldSort: true,
findAllMatches: true
});
const matches = fuse.search(text, options?.max ? { limit: options?.max } : undefined);
const profileWithSearchData = matches.map(fuseMatch => {
return {
...fuseMatch,
...fuseMatch.item,
searchWords: words,
item: undefined
};
});
return profileWithSearchData;
}
/**
* @param {string} shortDID
*/
async function getLastRepoSyncRev(shortDID) {
return db.repoSync.get(shortDID).then(sync => sync?.lastSyncRev);
}
/**
* @param {import('../firehose').FirehoseRecord[]} records
* @param {number} now
*/
async function syncRepoWithData(records, now) {
let lastSync = '';
for (const record of records) {
if (record.$type === 'app.bsky.feed.like' || record.$type === 'app.bsky.feed.post') {
const parsedURI = breakFeedURI(record.uri);
if (parsedURI?.postID && parsedURI.postID > lastSync) {
// only consider POSTs, not other feed URIs
lastSync = parsedURI.postID;
}
}
}
const compact = [];
for (const record of records) {
const co = memStore.captureRecord(record, now);
if (co) {
compact.push(co);
}
}
await currentBulkUpdate;
await performUpdate();
if (lastSync) {
db.repoSync.put({ shortDID: shortenDID(records[0].repo), lastSyncRev: lastSync });
}
return compact;
}
}