UNPKG

jorel

Version:

A unified wrapper for working with LLMs from multiple providers, including streams, images, documents & automatic tool use.

124 lines (123 loc) 5.38 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.LlmDocumentCollection = void 0; const document_1 = require("./document"); /** * Check if a document type should be used as a semantic XML tag name. * Types starting with a capital letter (e.g., "Product", "CustomerProfile") * will be used as the XML tag name directly for better LLM performance. */ const isSemanticType = (type) => /^[A-Z]/.test(type); /** * A collection of LLM documents, like a binder or folder of documents * that can be used for grounding LLM generations (either directly or passed to agents). * * Also provides a system message representation of the documents */ class LlmDocumentCollection { constructor(documents = [], config = {}) { const _documents = documents.map((document) => document instanceof document_1.LlmDocument ? document : new document_1.LlmDocument(document)); this._documents = new Map(_documents.map((document) => [document.id, document])); this.documentToTextTemplate = config.documentToText || "xml"; } /** * The number of documents in the collection */ get length() { return this._documents.size; } /** * Get all documents in the collection (as a copy) */ get all() { return Array.from(this._documents.values()); } /** * Get the definition of all documents in the collection (e.g. for serialization) */ get definition() { return this.all.map((document) => document.definition); } /** * Get a system message representation of the documents */ get systemMessageRepresentation() { if (this._documents.size === 0) return "-"; if (this.documentToTextTemplate === "json") return JSON.stringify(this.definition); // Default XML mode with semantic tag names if (this.documentToTextTemplate === "xml") { const rendered = this.all.map((document) => { // Use type as tag name if it starts with a capital letter (e.g., "Product", "CustomerProfile") // Otherwise use generic "Document" tag with type attribute const usesSemanticTag = document.type && isSemanticType(document.type); const tagName = usesSemanticTag ? document.type : "Document"; const extraAttributes = document.attributes ? Object.entries(document.attributes) : []; const extraAttrsString = extraAttributes.map(([key, value]) => `${key}='${value}'`).join(" "); // Build attributes - only include type when using generic Document tag let attrs = `id='${document.id}'`; if (!usesSemanticTag) { attrs += ` type='${document.type}'`; } attrs += ` title='${document.title}'`; attrs += ` source='${document.source || "n/a"}'`; if (extraAttrsString) { attrs += ` ${extraAttrsString}`; } return `<${tagName} ${attrs}>${document.content}</${tagName}>`; }); return `<Documents>\n${rendered.join("\n")}\n</Documents>`; } // Custom template handling const template = this.documentToTextTemplate.template; const rendered = this.all.map((document) => { if (!template.includes("{{id}}")) throw new Error("Document template must include '{{id}}' placeholder."); if (!template.includes("{{content}}")) throw new Error("Document template must include '{{content}}' placeholder."); const _attributes = document.attributes ? Object.entries(document.attributes) : []; const attributes = _attributes .map(([key, value]) => `${key}='${value}'`) .join(" ") .trim(); // Calculate semantic tag name for custom templates that want to use it const usesSemanticTag = document.type && isSemanticType(document.type); const tagName = usesSemanticTag ? document.type : "Document"; return template .replace(/\{\{tagName\}\}/g, tagName) .replace("{{id}}", document.id) .replace("{{type}}", document.type) .replace("{{title}}", document.title) .replace("{{content}}", document.content) .replace("{{attributes}}", attributes ? ` ${attributes}` : "") .replace("{{source}}", document.source || "n/a"); }); return rendered.join(this.documentToTextTemplate.separator); } /** * Create a new collection from a JSON representation */ static fromJSON(documents = []) { return new LlmDocumentCollection(documents.map((document) => new document_1.LlmDocument(document instanceof document_1.LlmDocument ? document.definition : document))); } /** * Add a document to the collection */ add(document) { this._documents.set(document.id, document); } /** * Remove a document from the collection */ remove(id) { this._documents.delete(id); } /** * Get a document by its ID */ get(id) { return this._documents.get(id); } } exports.LlmDocumentCollection = LlmDocumentCollection;