UNPKG

@langchain/community

Version:
1 lines 11.4 kB
{"version":3,"file":"obsidian.cjs","names":["BaseDocumentLoader","yaml","Document","DirectoryLoader","UnknownHandling"],"sources":["../../../src/document_loaders/fs/obsidian.ts"],"sourcesContent":["import type { basename as BasenameT } from \"node:path\";\nimport type { readFile as ReadFileT, stat as StatT } from \"node:fs/promises\";\nimport yaml from \"js-yaml\";\nimport { Document } from \"@langchain/core/documents\";\nimport { getEnv } from \"@langchain/core/utils/env\";\nimport { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\nimport {\n DirectoryLoader,\n UnknownHandling,\n} from \"@langchain/classic/document_loaders/fs/directory\";\n\nexport type FrontMatter = {\n title?: string;\n description?: string;\n tags?: string[] | string;\n [key: string]: unknown;\n};\n\nexport interface ObsidianFileLoaderOptions {\n encoding?: BufferEncoding;\n collectMetadata?: boolean;\n}\n\n/**\n * Represents a loader for Obsidian markdown files. This loader extends the BaseDocumentLoader\n * and provides functionality to parse and extract metadata, tags, and dataview fields from\n * Obsidian markdown files.\n */\nclass ObsidianFileLoader extends BaseDocumentLoader {\n private filePath: string;\n\n private encoding: BufferEncoding;\n\n private collectMetadata: boolean;\n\n /**\n * Initializes a new instance of the ObsidianFileLoader class.\n * @param filePath The path to the Obsidian markdown file.\n * @param encoding The character encoding to use when reading the file. Defaults to 'utf-8'.\n * @param collectMetadata Determines whether metadata should be collected from the file. Defaults to true.\n */\n constructor(\n filePath: string,\n {\n encoding = \"utf-8\",\n collectMetadata = true,\n }: ObsidianFileLoaderOptions = {}\n ) {\n super();\n this.filePath = filePath;\n this.encoding = encoding;\n this.collectMetadata = collectMetadata;\n }\n\n private static FRONT_MATTER_REGEX = /^---\\n(.*?)\\n---\\n/s;\n\n /**\n * Parses the YAML front matter from the given content string.\n * @param content The string content of the markdown file.\n * @returns An object representing the parsed front matter.\n */\n private parseFrontMatter(content: string): FrontMatter {\n if (!this.collectMetadata) {\n return {};\n }\n\n const match = content.match(ObsidianFileLoader.FRONT_MATTER_REGEX);\n if (!match) {\n return {};\n }\n\n try {\n const frontMatter = yaml.load(match[1]) as FrontMatter;\n if (frontMatter.tags && typeof frontMatter.tags === \"string\") {\n frontMatter.tags = frontMatter.tags.split(\", \");\n }\n\n return frontMatter;\n } catch {\n console.warn(\"Encountered non-yaml frontmatter\");\n return {};\n }\n }\n\n /**\n * Removes YAML front matter from the given content string.\n * @param content The string content of the markdown file.\n * @returns The content string with the front matter removed.\n */\n private removeFrontMatter(content: string): string {\n if (!this.collectMetadata) {\n return content;\n }\n\n return content.replace(ObsidianFileLoader.FRONT_MATTER_REGEX, \"\");\n }\n\n private static TAG_REGEX = /(?:\\s|^)#([a-zA-Z_][\\w/-]*)/g;\n\n /**\n * Parses Obsidian-style tags from the given content string.\n * @param content The string content of the markdown file.\n * @returns A set of parsed tags.\n */\n private parseObsidianTags(content: string): Set<string> {\n if (!this.collectMetadata) {\n return new Set();\n }\n\n const matches = content.matchAll(ObsidianFileLoader.TAG_REGEX);\n const tags = new Set<string>();\n for (const match of matches) {\n tags.add(match[1]);\n }\n\n return tags;\n }\n\n private static DATAVIEW_LINE_REGEX = /^\\s*(\\w+)::\\s*(.*)$/gm;\n\n private static DATAVIEW_INLINE_BRACKET_REGEX = /\\[(\\w+)::\\s*(.*)\\]/gm;\n\n private static DATAVIEW_INLINE_PAREN_REGEX = /\\((\\w+)::\\s*(.*)\\)/gm;\n\n /**\n * Parses dataview fields from the given content string.\n * @param content The string content of the markdown file.\n * @returns A record object containing key-value pairs of dataview fields.\n */\n private parseObsidianDataviewFields(content: string): Record<string, string> {\n if (!this.collectMetadata) {\n return {};\n }\n\n const fields: Record<string, string> = {};\n const lineMatches = content.matchAll(\n ObsidianFileLoader.DATAVIEW_LINE_REGEX\n );\n for (const [, key, value] of lineMatches) {\n fields[key] = value;\n }\n\n const bracketMatches = content.matchAll(\n ObsidianFileLoader.DATAVIEW_INLINE_BRACKET_REGEX\n );\n for (const [, key, value] of bracketMatches) {\n fields[key] = value;\n }\n\n const parenMatches = content.matchAll(\n ObsidianFileLoader.DATAVIEW_INLINE_PAREN_REGEX\n );\n for (const [, key, value] of parenMatches) {\n fields[key] = value;\n }\n\n return fields;\n }\n\n /**\n * Converts metadata to a format compatible with Langchain.\n * @param metadata The metadata object to convert.\n * @returns A record object containing key-value pairs of Langchain-compatible metadata.\n */\n private toLangchainCompatibleMetadata(metadata: Record<string, unknown>) {\n const result: Record<string, unknown> = {};\n for (const [key, value] of Object.entries(metadata)) {\n if (typeof value === \"string\" || typeof value === \"number\") {\n result[key] = value;\n } else {\n result[key] = JSON.stringify(value);\n }\n }\n return result;\n }\n\n /**\n * It loads the Obsidian file, parses it, and returns a `Document` instance.\n * @returns An array of `Document` instances to comply with the BaseDocumentLoader interface.\n */\n public async load(): Promise<Document[]> {\n const documents: Document[] = [];\n\n const { basename, readFile, stat } = await ObsidianFileLoader.imports();\n const fileName = basename(this.filePath);\n const stats = await stat(this.filePath);\n let content = await readFile(this.filePath, this.encoding);\n\n const frontMatter = this.parseFrontMatter(content);\n const tags = this.parseObsidianTags(content);\n const dataviewFields = this.parseObsidianDataviewFields(content);\n content = this.removeFrontMatter(content);\n\n const metadata: Document[\"metadata\"] = {\n source: fileName,\n path: this.filePath,\n created: stats.birthtimeMs,\n lastModified: stats.mtimeMs,\n lastAccessed: stats.atimeMs,\n ...this.toLangchainCompatibleMetadata(frontMatter),\n ...dataviewFields,\n };\n\n if (tags.size || frontMatter.tags) {\n metadata.tags = Array.from(\n new Set([...tags, ...(frontMatter.tags ?? [])])\n ).join(\",\");\n }\n\n documents.push(\n new Document({\n pageContent: content,\n metadata,\n })\n );\n\n return documents;\n }\n\n /**\n * Imports the necessary functions from the `node:path` and\n * `node:fs/promises` modules. It is used to dynamically import the\n * functions when needed. If the import fails, it throws an error\n * indicating that the modules failed to load.\n * @returns A promise that resolves to an object containing the imported functions.\n */\n static async imports(): Promise<{\n basename: typeof BasenameT;\n readFile: typeof ReadFileT;\n stat: typeof StatT;\n }> {\n try {\n const { basename } = await import(\"node:path\");\n const { readFile, stat } = await import(\"node:fs/promises\");\n return { basename, readFile, stat };\n } catch (e) {\n console.error(e);\n throw new Error(\n `Failed to load fs/promises. ObsidianFileLoader available only on environment 'node'. It appears you are running environment '${getEnv()}'. See https://<link to docs> for alternatives.`\n );\n }\n }\n}\n\n/**\n * Represents a loader for directories containing Obsidian markdown files. This loader extends\n * the DirectoryLoader and provides functionality to load and parse '.md' files with YAML frontmatter,\n * Obsidian tags, and Dataview fields.\n */\nexport class ObsidianLoader extends DirectoryLoader {\n /**\n * Initializes a new instance of the ObsidianLoader class.\n * @param directoryPath The path to the directory containing Obsidian markdown files.\n * @param encoding The character encoding to use when reading files. Defaults to 'utf-8'.\n * @param collectMetadata Determines whether metadata should be collected from the files. Defaults to true.\n */\n constructor(directoryPath: string, options?: ObsidianFileLoaderOptions) {\n super(\n directoryPath,\n {\n \".md\": (filePath) => new ObsidianFileLoader(filePath, options),\n },\n true,\n UnknownHandling.Ignore\n );\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;AA4BA,IAAM,qBAAN,MAAM,2BAA2BA,sCAAAA,mBAAmB;CAClD;CAEA;CAEA;;;;;;;CAQA,YACE,UACA,EACE,WAAW,SACX,kBAAkB,SACW,EAAE,EACjC;AACA,SAAO;AACP,OAAK,WAAW;AAChB,OAAK,WAAW;AAChB,OAAK,kBAAkB;;CAGzB,OAAe,qBAAqB;;;;;;CAOpC,iBAAyB,SAA8B;AACrD,MAAI,CAAC,KAAK,gBACR,QAAO,EAAE;EAGX,MAAM,QAAQ,QAAQ,MAAM,mBAAmB,mBAAmB;AAClE,MAAI,CAAC,MACH,QAAO,EAAE;AAGX,MAAI;GACF,MAAM,cAAcC,QAAAA,QAAK,KAAK,MAAM,GAAG;AACvC,OAAI,YAAY,QAAQ,OAAO,YAAY,SAAS,SAClD,aAAY,OAAO,YAAY,KAAK,MAAM,KAAK;AAGjD,UAAO;UACD;AACN,WAAQ,KAAK,mCAAmC;AAChD,UAAO,EAAE;;;;;;;;CASb,kBAA0B,SAAyB;AACjD,MAAI,CAAC,KAAK,gBACR,QAAO;AAGT,SAAO,QAAQ,QAAQ,mBAAmB,oBAAoB,GAAG;;CAGnE,OAAe,YAAY;;;;;;CAO3B,kBAA0B,SAA8B;AACtD,MAAI,CAAC,KAAK,gBACR,wBAAO,IAAI,KAAK;EAGlB,MAAM,UAAU,QAAQ,SAAS,mBAAmB,UAAU;EAC9D,MAAM,uBAAO,IAAI,KAAa;AAC9B,OAAK,MAAM,SAAS,QAClB,MAAK,IAAI,MAAM,GAAG;AAGpB,SAAO;;CAGT,OAAe,sBAAsB;CAErC,OAAe,gCAAgC;CAE/C,OAAe,8BAA8B;;;;;;CAO7C,4BAAoC,SAAyC;AAC3E,MAAI,CAAC,KAAK,gBACR,QAAO,EAAE;EAGX,MAAM,SAAiC,EAAE;EACzC,MAAM,cAAc,QAAQ,SAC1B,mBAAmB,oBACpB;AACD,OAAK,MAAM,GAAG,KAAK,UAAU,YAC3B,QAAO,OAAO;EAGhB,MAAM,iBAAiB,QAAQ,SAC7B,mBAAmB,8BACpB;AACD,OAAK,MAAM,GAAG,KAAK,UAAU,eAC3B,QAAO,OAAO;EAGhB,MAAM,eAAe,QAAQ,SAC3B,mBAAmB,4BACpB;AACD,OAAK,MAAM,GAAG,KAAK,UAAU,aAC3B,QAAO,OAAO;AAGhB,SAAO;;;;;;;CAQT,8BAAsC,UAAmC;EACvE,MAAM,SAAkC,EAAE;AAC1C,OAAK,MAAM,CAAC,KAAK,UAAU,OAAO,QAAQ,SAAS,CACjD,KAAI,OAAO,UAAU,YAAY,OAAO,UAAU,SAChD,QAAO,OAAO;MAEd,QAAO,OAAO,KAAK,UAAU,MAAM;AAGvC,SAAO;;;;;;CAOT,MAAa,OAA4B;EACvC,MAAM,YAAwB,EAAE;EAEhC,MAAM,EAAE,UAAU,UAAU,SAAS,MAAM,mBAAmB,SAAS;EACvE,MAAM,WAAW,SAAS,KAAK,SAAS;EACxC,MAAM,QAAQ,MAAM,KAAK,KAAK,SAAS;EACvC,IAAI,UAAU,MAAM,SAAS,KAAK,UAAU,KAAK,SAAS;EAE1D,MAAM,cAAc,KAAK,iBAAiB,QAAQ;EAClD,MAAM,OAAO,KAAK,kBAAkB,QAAQ;EAC5C,MAAM,iBAAiB,KAAK,4BAA4B,QAAQ;AAChE,YAAU,KAAK,kBAAkB,QAAQ;EAEzC,MAAM,WAAiC;GACrC,QAAQ;GACR,MAAM,KAAK;GACX,SAAS,MAAM;GACf,cAAc,MAAM;GACpB,cAAc,MAAM;GACpB,GAAG,KAAK,8BAA8B,YAAY;GAClD,GAAG;GACJ;AAED,MAAI,KAAK,QAAQ,YAAY,KAC3B,UAAS,OAAO,MAAM,KACpB,IAAI,IAAI,CAAC,GAAG,MAAM,GAAI,YAAY,QAAQ,EAAE,CAAE,CAAC,CAChD,CAAC,KAAK,IAAI;AAGb,YAAU,KACR,IAAIC,0BAAAA,SAAS;GACX,aAAa;GACb;GACD,CAAC,CACH;AAED,SAAO;;;;;;;;;CAUT,aAAa,UAIV;AACD,MAAI;GACF,MAAM,EAAE,aAAa,MAAM,OAAO;GAClC,MAAM,EAAE,UAAU,SAAS,MAAM,OAAO;AACxC,UAAO;IAAE;IAAU;IAAU;IAAM;WAC5B,GAAG;AACV,WAAQ,MAAM,EAAE;AAChB,SAAM,IAAI,MACR,iIAAA,GAAA,0BAAA,SAAwI,CAAC,iDAC1I;;;;;;;;;AAUP,IAAa,iBAAb,cAAoCC,iDAAAA,gBAAgB;;;;;;;CAOlD,YAAY,eAAuB,SAAqC;AACtE,QACE,eACA,EACE,QAAQ,aAAa,IAAI,mBAAmB,UAAU,QAAQ,EAC/D,EACD,MACAC,iDAAAA,gBAAgB,OACjB"}