UNPKG

llamaindex

Version:

<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>

561 lines (552 loc) 21.8 kB
import { Settings as Settings$1, DEFAULT_PERSIST_DIR, DEFAULT_DOC_STORE_PERSIST_FILENAME, DEFAULT_NAMESPACE } from '@llamaindex/core/global'; export { CallbackManager, DEFAULT_BASE_URL, DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_OVERLAP_RATIO, DEFAULT_CHUNK_SIZE, DEFAULT_COLLECTION, DEFAULT_CONTEXT_WINDOW, DEFAULT_DOC_STORE_PERSIST_FILENAME, DEFAULT_GRAPH_STORE_PERSIST_FILENAME, DEFAULT_INDEX_STORE_PERSIST_FILENAME, DEFAULT_NAMESPACE, DEFAULT_NUM_OUTPUTS, DEFAULT_PADDING, DEFAULT_PERSIST_DIR, DEFAULT_PROJECT_NAME, DEFAULT_VECTOR_STORE_PERSIST_FILENAME } from '@llamaindex/core/global'; import { PromptHelper } from '@llamaindex/core/indices'; export * from '@llamaindex/core/indices'; import { SentenceSplitter } from '@llamaindex/core/node-parser'; import { AsyncLocalStorage, fs, path, consoleLogger } from '@llamaindex/env'; export { LlamaParseReader } from '@llamaindex/cloud/reader'; export * from '@llamaindex/core/agent'; export * from '@llamaindex/core/chat-engine'; export * from '@llamaindex/core/data-structs'; import { getTopKMMREmbeddings, getTopKEmbeddings } from '@llamaindex/core/embeddings'; export * from '@llamaindex/core/embeddings'; export * from '@llamaindex/core/llms'; export * from '@llamaindex/core/memory'; export * from '@llamaindex/core/postprocessor'; import { PromptMixin, defaultSubQuestionPrompt } from '@llamaindex/core/prompts'; export * from '@llamaindex/core/prompts'; export * from '@llamaindex/core/query-engine'; export * from '@llamaindex/core/response-synthesizers'; export * from '@llamaindex/core/retriever'; import { ModalityType, ObjectType } from '@llamaindex/core/schema'; export * from '@llamaindex/core/schema'; export * from '@llamaindex/core/storage/chat-store'; import { KVDocumentStore } from '@llamaindex/core/storage/doc-store'; export * from '@llamaindex/core/storage/doc-store'; export { BaseDocumentStore } from '@llamaindex/core/storage/doc-store'; import { SimpleIndexStore } from '@llamaindex/core/storage/index-store'; export * from '@llamaindex/core/storage/index-store'; import { SimpleKVStore, BaseInMemoryKVStore } from '@llamaindex/core/storage/kv-store'; export * from '@llamaindex/core/storage/kv-store'; import { toToolDescriptions, extractText } from '@llamaindex/core/utils'; export * from '@llamaindex/core/utils'; export * from '../agent/dist/index.js'; export * from '../cloud/dist/index.js'; export * from '../engines/dist/index.js'; export * from '../evaluation/dist/index.js'; export * from '../extractors/dist/index.js'; export * from '../indices/dist/index.js'; export * from '../ingestion/dist/index.js'; export * from '../node-parser/dist/index.js'; export * from '../objects/dist/index.js'; export * from '../postprocessors/dist/index.js'; export * from '../selectors/dist/index.js'; import { VectorStoreQueryMode, FilterOperator, parsePrimitiveValue, parseArrayValue, BaseVectorStore, nodeToMetadata } from '@llamaindex/core/vector-store'; import _ from 'lodash'; export * from '../tools/dist/index.js'; export * from '../storage/dist/index.js'; export * from '../vector-store/dist/index.js'; /** * @internal */ class GlobalSettings { #prompt; #promptHelper; #nodeParser; #chunkOverlap; #promptHelperAsyncLocalStorage; #nodeParserAsyncLocalStorage; #chunkOverlapAsyncLocalStorage; #promptAsyncLocalStorage; get debug() { return Settings$1.debug; } get llm() { return Settings$1.llm; } set llm(llm) { Settings$1.llm = llm; } withLLM(llm, fn) { return Settings$1.withLLM(llm, fn); } get promptHelper() { if (this.#promptHelper === null) { this.#promptHelper = new PromptHelper(); } return this.#promptHelperAsyncLocalStorage.getStore() ?? this.#promptHelper; } set promptHelper(promptHelper) { this.#promptHelper = promptHelper; } withPromptHelper(promptHelper, fn) { return this.#promptHelperAsyncLocalStorage.run(promptHelper, fn); } get embedModel() { return Settings$1.embedModel; } set embedModel(embedModel) { Settings$1.embedModel = embedModel; } withEmbedModel(embedModel, fn) { return Settings$1.withEmbedModel(embedModel, fn); } get nodeParser() { if (this.#nodeParser === null) { this.#nodeParser = new SentenceSplitter({ chunkSize: this.chunkSize, chunkOverlap: this.chunkOverlap }); } return this.#nodeParserAsyncLocalStorage.getStore() ?? this.#nodeParser; } set nodeParser(nodeParser) { this.#nodeParser = nodeParser; } withNodeParser(nodeParser, fn) { return this.#nodeParserAsyncLocalStorage.run(nodeParser, fn); } get callbackManager() { return Settings$1.callbackManager; } set callbackManager(callbackManager) { Settings$1.callbackManager = callbackManager; } withCallbackManager(callbackManager, fn) { return Settings$1.withCallbackManager(callbackManager, fn); } set chunkSize(chunkSize) { Settings$1.chunkSize = chunkSize; } get chunkSize() { return Settings$1.chunkSize; } withChunkSize(chunkSize, fn) { return Settings$1.withChunkSize(chunkSize, fn); } get chunkOverlap() { return this.#chunkOverlapAsyncLocalStorage.getStore() ?? this.#chunkOverlap; } set chunkOverlap(chunkOverlap) { if (typeof chunkOverlap === "number") { this.#chunkOverlap = chunkOverlap; } } withChunkOverlap(chunkOverlap, fn) { return this.#chunkOverlapAsyncLocalStorage.run(chunkOverlap, fn); } get prompt() { return this.#promptAsyncLocalStorage.getStore() ?? this.#prompt; } set prompt(prompt) { this.#prompt = prompt; } withPrompt(prompt, fn) { return this.#promptAsyncLocalStorage.run(prompt, fn); } constructor(){ this.#prompt = {}; this.#promptHelper = null; this.#nodeParser = null; this.#promptHelperAsyncLocalStorage = new AsyncLocalStorage(); this.#nodeParserAsyncLocalStorage = new AsyncLocalStorage(); this.#chunkOverlapAsyncLocalStorage = new AsyncLocalStorage(); this.#promptAsyncLocalStorage = new AsyncLocalStorage(); } } const Settings = new GlobalSettings(); /** * Error class for output parsing. Due to the nature of LLMs, anytime we use LLM * to generate structured output, it's possible that it will hallucinate something * that doesn't match the expected output format. So make sure to catch these * errors in production. */ class OutputParserError extends Error { constructor(message, options = {}){ super(message, options); // https://github.com/tc39/proposal-error-cause this.name = "OutputParserError"; if (!this.cause) { // Need to check for those environments that have implemented the proposal this.cause = options.cause; } this.output = options.output; // This line is to maintain proper stack trace in V8 // (https://v8.dev/docs/stack-trace-api) if (Error.captureStackTrace) { Error.captureStackTrace(this, OutputParserError); } } } /** * * @param text A markdown block with JSON * @returns parsed JSON object */ function parseJsonMarkdown(text) { text = text.trim(); const left_square = text.indexOf("["); const left_brace = text.indexOf("{"); let left; let right; if (left_square < left_brace && left_square != -1) { left = left_square; right = text.lastIndexOf("]"); } else { left = left_brace; right = text.lastIndexOf("}"); } const jsonText = text.substring(left, right + 1); try { //Single JSON object case if (left_square === -1) { return [ JSON.parse(jsonText) ]; } //Multiple JSON object case. return JSON.parse(jsonText); } catch (e) { throw new OutputParserError("Not a json markdown", { output: text }); } } /** * SubQuestionOutputParser is used to parse the output of the SubQuestionGenerator. */ class SubQuestionOutputParser { parse(output) { const parsed = parseJsonMarkdown(output); return { rawOutput: output, parsedOutput: parsed }; } format(output) { return output; } } /** * LLMQuestionGenerator uses the LLM to generate new questions for the LLM using tools and a user query. */ class LLMQuestionGenerator extends PromptMixin { constructor(init){ super(); this.llm = init?.llm ?? Settings.llm; this.prompt = init?.prompt ?? defaultSubQuestionPrompt; this.outputParser = init?.outputParser ?? new SubQuestionOutputParser(); } _getPrompts() { return { subQuestion: this.prompt }; } _updatePrompts(promptsDict) { if ("subQuestion" in promptsDict) { this.prompt = promptsDict.subQuestion; } } async generate(tools, query) { const toolsStr = toToolDescriptions(tools); const queryStr = extractText(query); const prediction = (await this.llm.complete({ prompt: this.prompt.format({ toolsStr, queryStr }) })).text; const structuredOutput = this.outputParser.parse(prediction); return structuredOutput.parsedOutput; } _getPromptModules() { return {}; } } // FS utility helpers /** * Checks if a file exists. * Analogous to the os.path.exists function from Python. * @param path The path to the file to check. * @returns A promise that resolves to true if the file exists, false otherwise. */ async function exists(path) { try { await fs.access(path); return true; } catch { return false; } } const LEARNER_MODES = new Set([ VectorStoreQueryMode.SVM, VectorStoreQueryMode.LINEAR_REGRESSION, VectorStoreQueryMode.LOGISTIC_REGRESSION ]); const MMR_MODE = VectorStoreQueryMode.MMR; // Mapping of filter operators to metadata filter functions const OPERATOR_TO_FILTER = { [FilterOperator.EQ]: ({ key, value }, metadata)=>{ return metadata[key] === parsePrimitiveValue(value); }, [FilterOperator.NE]: ({ key, value }, metadata)=>{ return metadata[key] !== parsePrimitiveValue(value); }, [FilterOperator.IN]: ({ key, value }, metadata)=>{ return !!parseArrayValue(value).find((v)=>metadata[key] === v); }, [FilterOperator.NIN]: ({ key, value }, metadata)=>{ return !parseArrayValue(value).find((v)=>metadata[key] === v); }, [FilterOperator.ANY]: ({ key, value }, metadata)=>{ if (!Array.isArray(metadata[key])) return false; return parseArrayValue(value).some((v)=>metadata[key].includes(v)); }, [FilterOperator.ALL]: ({ key, value }, metadata)=>{ if (!Array.isArray(metadata[key])) return false; return parseArrayValue(value).every((v)=>metadata[key].includes(v)); }, [FilterOperator.TEXT_MATCH]: ({ key, value }, metadata)=>{ return metadata[key].includes(parsePrimitiveValue(value)); }, [FilterOperator.CONTAINS]: ({ key, value }, metadata)=>{ if (!Array.isArray(metadata[key])) return false; return !!parseArrayValue(metadata[key]).find((v)=>v === value); }, [FilterOperator.GT]: ({ key, value }, metadata)=>{ return metadata[key] > parsePrimitiveValue(value); }, [FilterOperator.LT]: ({ key, value }, metadata)=>{ return metadata[key] < parsePrimitiveValue(value); }, [FilterOperator.GTE]: ({ key, value }, metadata)=>{ return metadata[key] >= parsePrimitiveValue(value); }, [FilterOperator.LTE]: ({ key, value }, metadata)=>{ return metadata[key] <= parsePrimitiveValue(value); } }; // Build a filter function based on the metadata and the preFilters const buildFilterFn = (metadata, preFilters)=>{ if (!preFilters) return true; if (!metadata) return false; const { filters, condition } = preFilters; const queryCondition = condition || "and"; // default to and const itemFilterFn = (filter)=>{ if (filter.operator === FilterOperator.IS_EMPTY) { // for `is_empty` operator, return true if the metadata key is not present or the value is empty const value = metadata[filter.key]; return value === undefined || value === null || value === "" || Array.isArray(value) && value.length === 0; } if (metadata[filter.key] === undefined) { // for other operators, always return false if the metadata key is not present return false; } const metadataLookupFn = OPERATOR_TO_FILTER[filter.operator]; if (!metadataLookupFn) throw new Error(`Unsupported operator: ${filter.operator}`); return metadataLookupFn(filter, metadata); }; if (queryCondition === "and") return filters.every(itemFilterFn); return filters.some(itemFilterFn); }; class SimpleVectorStoreData { constructor(){ this.embeddingDict = {}; this.textIdToRefDocId = {}; this.metadataDict = {}; } } class SimpleVectorStore extends BaseVectorStore { constructor(init){ super(init), this.storesText = false; this.data = init?.data || new SimpleVectorStoreData(); } static async fromPersistDir(persistDir = DEFAULT_PERSIST_DIR, embedModel, options) { const persistPath = path.join(persistDir, "vector_store.json"); return await SimpleVectorStore.fromPersistPath(persistPath, embedModel, options); } client() { return null; } async get(textId) { return this.data.embeddingDict[textId]; } async add(embeddingResults) { for (const node of embeddingResults){ this.data.embeddingDict[node.id_] = node.getEmbedding(); if (!node.sourceNode) { continue; } this.data.textIdToRefDocId[node.id_] = node.sourceNode?.nodeId; // Add metadata to the metadataDict const metadata = nodeToMetadata(node, true, undefined, false); delete metadata["_node_content"]; this.data.metadataDict[node.id_] = metadata; } if (this.persistPath) { await this.persist(this.persistPath); } return embeddingResults.map((result)=>result.id_); } async delete(refDocId) { const textIdsToDelete = Object.keys(this.data.textIdToRefDocId).filter((textId)=>this.data.textIdToRefDocId[textId] === refDocId); for (const textId of textIdsToDelete){ delete this.data.embeddingDict[textId]; delete this.data.textIdToRefDocId[textId]; if (this.data.metadataDict) delete this.data.metadataDict[textId]; } if (this.persistPath) { await this.persist(this.persistPath); } return Promise.resolve(); } async filterNodes(query) { const items = Object.entries(this.data.embeddingDict); const queryFilterFn = (nodeId)=>{ const metadata = this.data.metadataDict[nodeId]; return buildFilterFn(metadata, query.filters); }; const nodeFilterFn = (nodeId)=>{ if (!query.docIds) return true; const availableIds = new Set(query.docIds); return availableIds.has(nodeId); }; const queriedItems = items.filter((item)=>nodeFilterFn(item[0]) && queryFilterFn(item[0])); const nodeIds = queriedItems.map((item)=>item[0]); const embeddings = queriedItems.map((item)=>item[1]); return { nodeIds, embeddings }; } async query(query) { const { nodeIds, embeddings } = await this.filterNodes(query); const queryEmbedding = query.queryEmbedding; let topSimilarities, topIds; if (LEARNER_MODES.has(query.mode)) { // fixme: unfinished throw new Error("Learner modes not implemented for SimpleVectorStore yet."); } else if (query.mode === MMR_MODE) { const mmrThreshold = query.mmrThreshold; [topSimilarities, topIds] = getTopKMMREmbeddings(queryEmbedding, embeddings, null, query.similarityTopK, nodeIds, mmrThreshold); } else if (query.mode === VectorStoreQueryMode.DEFAULT) { [topSimilarities, topIds] = getTopKEmbeddings(queryEmbedding, embeddings, query.similarityTopK, nodeIds); } else { throw new Error(`Invalid query mode: ${query.mode}`); } return Promise.resolve({ similarities: topSimilarities, ids: topIds }); } async persist(persistPath = path.join(DEFAULT_PERSIST_DIR, "vector_store.json")) { await SimpleVectorStore.persistData(persistPath, this.data); } static async persistData(persistPath, data) { const dirPath = path.dirname(persistPath); if (!await exists(dirPath)) { await fs.mkdir(dirPath); } await fs.writeFile(persistPath, JSON.stringify(data)); } static async fromPersistPath(persistPath, embedModel, options) { const logger = options?.logger ?? consoleLogger; const dirPath = path.dirname(persistPath); if (!await exists(dirPath)) { await fs.mkdir(dirPath, { recursive: true }); } let dataDict = {}; if (!await exists(persistPath)) { logger.log(`Starting new store from path: ${persistPath}`); } else { try { const fileData = await fs.readFile(persistPath); dataDict = JSON.parse(fileData.toString()); } catch (e) { throw new Error(`Failed to load data from path: ${persistPath}`, { cause: e }); } } const data = new SimpleVectorStoreData(); // @ts-expect-error TS2322 data.embeddingDict = dataDict.embeddingDict ?? {}; // @ts-expect-error TS2322 data.textIdToRefDocId = dataDict.textIdToRefDocId ?? {}; // @ts-expect-error TS2322 data.metadataDict = dataDict.metadataDict ?? {}; const store = new SimpleVectorStore({ data, embedModel }); store.persistPath = persistPath; return store; } static fromDict(saveDict, embedModel) { const data = new SimpleVectorStoreData(); data.embeddingDict = saveDict.embeddingDict; data.textIdToRefDocId = saveDict.textIdToRefDocId; data.metadataDict = saveDict.metadataDict; return new SimpleVectorStore({ data, embedModel }); } toDict() { return { embeddingDict: this.data.embeddingDict, textIdToRefDocId: this.data.textIdToRefDocId, metadataDict: this.data.metadataDict }; } } class SimpleDocumentStore extends KVDocumentStore { constructor(kvStore, namespace){ kvStore = kvStore || new SimpleKVStore(); namespace = namespace || DEFAULT_NAMESPACE; super(kvStore, namespace); this.kvStore = kvStore; } static async fromPersistDir(persistDir = DEFAULT_PERSIST_DIR, namespace, options) { const persistPath = path.join(persistDir, DEFAULT_DOC_STORE_PERSIST_FILENAME); return await SimpleDocumentStore.fromPersistPath(persistPath, namespace, options); } static async fromPersistPath(persistPath, namespace, options) { const simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath, options); return new SimpleDocumentStore(simpleKVStore, namespace); } async persist(persistPath = path.join(DEFAULT_PERSIST_DIR, DEFAULT_DOC_STORE_PERSIST_FILENAME)) { if (_.isObject(this.kvStore) && this.kvStore instanceof BaseInMemoryKVStore) { await this.kvStore.persist(persistPath); } } static fromDict(saveDict, namespace) { const simpleKVStore = SimpleKVStore.fromDict(saveDict); return new SimpleDocumentStore(simpleKVStore, namespace); } toDict() { if (_.isObject(this.kvStore) && this.kvStore instanceof SimpleKVStore) { return this.kvStore.toDict(); } // If the kvstore is not a SimpleKVStore, you might want to throw an error or return a default value. throw new Error("KVStore is not a SimpleKVStore"); } } async function storageContextFromDefaults({ docStore, indexStore, vectorStore, vectorStores, persistDir }) { vectorStores = vectorStores ?? {}; if (!persistDir) { docStore = docStore ?? new SimpleDocumentStore(); indexStore = indexStore ?? new SimpleIndexStore(); if (!(ModalityType.TEXT in vectorStores)) { vectorStores[ModalityType.TEXT] = vectorStore ?? new SimpleVectorStore(); } } else { const embedModel = Settings.embedModel; docStore = docStore || await SimpleDocumentStore.fromPersistDir(persistDir, DEFAULT_NAMESPACE); indexStore = indexStore || await SimpleIndexStore.fromPersistDir(persistDir); if (!(ObjectType.TEXT in vectorStores)) { vectorStores[ModalityType.TEXT] = vectorStore ?? await SimpleVectorStore.fromPersistDir(persistDir, embedModel); } } return { docStore, indexStore, vectorStores }; } export { LLMQuestionGenerator, Settings, SubQuestionOutputParser, parseJsonMarkdown, storageContextFromDefaults };