UNPKG

@braindb/core

Version:

markdown-graph-content-layer-database

198 lines (197 loc) 7.08 kB
import { readFile, stat } from "node:fs/promises"; import { dirname, resolve } from "node:path"; import { visit, SKIP, EXIT } from "unist-util-visit"; import { parse as parseYaml } from "yaml"; import { eq } from "drizzle-orm"; import { document, link, task } from "./schema.js"; import { mdParser } from "./parser.js"; import { cheksum64str, cheksumConfig, isExternalLink, memoizeOnce, } from "./utils.js"; import { deleteDocument } from "./deleteDocument.js"; import { defaultGetSlug, defaultGetUrl } from "./defaults.js"; import { Repository } from "@napi-rs/simple-git"; export const emptyAst = {}; const getRepo = memoizeOnce((path) => Repository.discover(path)); const getRepoPath = memoizeOnce((repo) => dirname(repo.path())); export async function addDocument(db, idPath, cfg, revision) { // maybe use prepared statement? const [existingDocument] = db .select({ id: document.id, path: document.path, mtime: document.mtime, checksum: document.checksum, cfghash: document.cfghash, }) .from(document) .where(eq(document.path, idPath)) .all(); const absolutePath = cfg.root + idPath; // https://nodejs.org/api/fs.html#class-fsstats const st = await stat(absolutePath); const mtime = st.mtimeMs; let checksum = ""; let markdown = ""; let cfghash = 0; if (cfg.cache) { cfghash = cheksumConfig(cfg); // https://ziglang.org/download/0.4.0/release-notes.html#Build-Artifact-Caching const trustedTimestamp = existingDocument && Math.abs(existingDocument.mtime - Date.now()) > 1000; if (existingDocument && trustedTimestamp && existingDocument.cfghash === cfghash && existingDocument.mtime === mtime) { await db .update(document) .set({ revision /*, updated_at */ }) .where(eq(document.path, idPath)); return; } markdown = await readFile(absolutePath, { encoding: "utf8" }); checksum = cheksum64str(markdown); if (existingDocument && existingDocument.cfghash === cfghash && existingDocument.checksum === checksum) { await db .update(document) .set({ revision /*, updated_at */ }) .where(eq(document.path, idPath)); return; } } else { markdown = await readFile(absolutePath, { encoding: "utf8" }); } let updated_at = Math.round(mtime); if (cfg.git) { // TODO: maybe, if file is modified use `mtimeMs` instead of git date? try { const repo = getRepo(cfg.root); updated_at = await repo.getFileLatestModifiedDateAsync(absolutePath.replace(getRepoPath(repo) + "/", "")); } catch (e) { // TODO: maybe config logger? // TODO: use LRU or Bloom filter to report warning only once console.log(`Warning: ${e}`); } } const ast = mdParser.parse(markdown); markdown = ""; const frontmatter = getFrontmatter(ast); const getUrl = cfg.url || defaultGetUrl; const getSlug = cfg.slug || defaultGetSlug; // typeof document.$inferInsert const newDocument = { id: existingDocument?.id, frontmatter, path: idPath, ast: cfg.storeMarkdown === false ? emptyAst : ast, mtime, checksum, cfghash, url: getUrl(idPath, frontmatter), slug: getSlug(idPath, frontmatter), updated_at, revision, }; if (existingDocument) deleteDocument(db, idPath); // TODO: should not update frontmatter here, // but title may be exception (or not?), because it is required for wikilinks // Alternatively: can use first H1 as title if (!newDocument.frontmatter.title) newDocument.frontmatter.title = newDocument.slug; db.insert(document) .values(newDocument) .onConflictDoUpdate({ target: document.path, set: newDocument }) .run(); visit(ast, (node) => { if (node.type === "link" || node.type === "wikiLink") { if (node.type === "link") { if (isExternalLink(node.url)) { /** * not interested in external links for now * in future may be used: * - to check if it returns <= 400 * - to fetch icon * - to generate screenshot */ return SKIP; } } let to_url, to_path, to_slug, to_anchor, label; if (node.type === "link") { label = node.children[0]?.value; [to_url, to_anchor] = decodeURI(node.url).split("#"); to_path = to_url; // resolve local link if (!to_url.startsWith("/")) { to_url = resolve(newDocument.url, to_url); } // normalize url if (!to_url.endsWith("/")) { to_url = to_url + "/"; } // resolve local path if (!to_path.startsWith("/")) { to_path = resolve(dirname(idPath), to_path); } } else { label = node.data.alias; [to_slug, to_anchor] = node.value.split("#"); } const start = node.position.start.offset; const line = node.position.start.line; const column = node.position.start.column; db.insert(link) .values({ from: idPath, start, to_url, to_path, to_slug, to_anchor, label, line, column, }) .run(); return SKIP; } if (node.type === "listItem" && (node.checked === true || node.checked === false)) { const start = node.position.start.offset; const line = node.position.start.line; const column = node.position.start.column; const checked = node.checked; const ast = node.children[0]; db.insert(task) .values({ from: idPath, start, line, column, checked, ast, }) .run(); return SKIP; } // if (node.type === "heading") { // return SKIP; // } }); } export function getFrontmatter(ast) { let frontmatter = {}; visit(ast, (node) => { if (node.type === "yaml") { /** * can yaml handle none-JSON types? if yes, than this is a bug */ frontmatter = parseYaml(node.value); return EXIT; } }); return frontmatter; }