llamaindex
Version:
<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>
561 lines (552 loc) • 21.8 kB
JavaScript
import { Settings as Settings$1, DEFAULT_PERSIST_DIR, DEFAULT_DOC_STORE_PERSIST_FILENAME, DEFAULT_NAMESPACE } from '@llamaindex/core/global';
export { CallbackManager, DEFAULT_BASE_URL, DEFAULT_CHUNK_OVERLAP, DEFAULT_CHUNK_OVERLAP_RATIO, DEFAULT_CHUNK_SIZE, DEFAULT_COLLECTION, DEFAULT_CONTEXT_WINDOW, DEFAULT_DOC_STORE_PERSIST_FILENAME, DEFAULT_GRAPH_STORE_PERSIST_FILENAME, DEFAULT_INDEX_STORE_PERSIST_FILENAME, DEFAULT_NAMESPACE, DEFAULT_NUM_OUTPUTS, DEFAULT_PADDING, DEFAULT_PERSIST_DIR, DEFAULT_PROJECT_NAME, DEFAULT_VECTOR_STORE_PERSIST_FILENAME } from '@llamaindex/core/global';
import { PromptHelper } from '@llamaindex/core/indices';
export * from '@llamaindex/core/indices';
import { SentenceSplitter } from '@llamaindex/core/node-parser';
import { AsyncLocalStorage, fs, path, consoleLogger } from '@llamaindex/env';
export { LlamaParseReader } from '@llamaindex/cloud/reader';
export * from '@llamaindex/core/agent';
export * from '@llamaindex/core/chat-engine';
export * from '@llamaindex/core/data-structs';
import { getTopKMMREmbeddings, getTopKEmbeddings } from '@llamaindex/core/embeddings';
export * from '@llamaindex/core/embeddings';
export * from '@llamaindex/core/llms';
export * from '@llamaindex/core/memory';
export * from '@llamaindex/core/postprocessor';
import { PromptMixin, defaultSubQuestionPrompt } from '@llamaindex/core/prompts';
export * from '@llamaindex/core/prompts';
export * from '@llamaindex/core/query-engine';
export * from '@llamaindex/core/response-synthesizers';
export * from '@llamaindex/core/retriever';
import { ModalityType, ObjectType } from '@llamaindex/core/schema';
export * from '@llamaindex/core/schema';
export * from '@llamaindex/core/storage/chat-store';
import { KVDocumentStore } from '@llamaindex/core/storage/doc-store';
export * from '@llamaindex/core/storage/doc-store';
export { BaseDocumentStore } from '@llamaindex/core/storage/doc-store';
import { SimpleIndexStore } from '@llamaindex/core/storage/index-store';
export * from '@llamaindex/core/storage/index-store';
import { SimpleKVStore, BaseInMemoryKVStore } from '@llamaindex/core/storage/kv-store';
export * from '@llamaindex/core/storage/kv-store';
import { toToolDescriptions, extractText } from '@llamaindex/core/utils';
export * from '@llamaindex/core/utils';
export * from '../agent/dist/index.js';
export * from '../cloud/dist/index.js';
export * from '../engines/dist/index.js';
export * from '../evaluation/dist/index.js';
export * from '../extractors/dist/index.js';
export * from '../indices/dist/index.js';
export * from '../ingestion/dist/index.js';
export * from '../node-parser/dist/index.js';
export * from '../objects/dist/index.js';
export * from '../postprocessors/dist/index.js';
export * from '../selectors/dist/index.js';
import { VectorStoreQueryMode, FilterOperator, parsePrimitiveValue, parseArrayValue, BaseVectorStore, nodeToMetadata } from '@llamaindex/core/vector-store';
import _ from 'lodash';
export * from '../tools/dist/index.js';
export * from '../storage/dist/index.js';
export * from '../vector-store/dist/index.js';
/**
* @internal
*/ class GlobalSettings {
#prompt;
#promptHelper;
#nodeParser;
#chunkOverlap;
#promptHelperAsyncLocalStorage;
#nodeParserAsyncLocalStorage;
#chunkOverlapAsyncLocalStorage;
#promptAsyncLocalStorage;
get debug() {
return Settings$1.debug;
}
get llm() {
return Settings$1.llm;
}
set llm(llm) {
Settings$1.llm = llm;
}
withLLM(llm, fn) {
return Settings$1.withLLM(llm, fn);
}
get promptHelper() {
if (this.#promptHelper === null) {
this.#promptHelper = new PromptHelper();
}
return this.#promptHelperAsyncLocalStorage.getStore() ?? this.#promptHelper;
}
set promptHelper(promptHelper) {
this.#promptHelper = promptHelper;
}
withPromptHelper(promptHelper, fn) {
return this.#promptHelperAsyncLocalStorage.run(promptHelper, fn);
}
get embedModel() {
return Settings$1.embedModel;
}
set embedModel(embedModel) {
Settings$1.embedModel = embedModel;
}
withEmbedModel(embedModel, fn) {
return Settings$1.withEmbedModel(embedModel, fn);
}
get nodeParser() {
if (this.#nodeParser === null) {
this.#nodeParser = new SentenceSplitter({
chunkSize: this.chunkSize,
chunkOverlap: this.chunkOverlap
});
}
return this.#nodeParserAsyncLocalStorage.getStore() ?? this.#nodeParser;
}
set nodeParser(nodeParser) {
this.#nodeParser = nodeParser;
}
withNodeParser(nodeParser, fn) {
return this.#nodeParserAsyncLocalStorage.run(nodeParser, fn);
}
get callbackManager() {
return Settings$1.callbackManager;
}
set callbackManager(callbackManager) {
Settings$1.callbackManager = callbackManager;
}
withCallbackManager(callbackManager, fn) {
return Settings$1.withCallbackManager(callbackManager, fn);
}
set chunkSize(chunkSize) {
Settings$1.chunkSize = chunkSize;
}
get chunkSize() {
return Settings$1.chunkSize;
}
withChunkSize(chunkSize, fn) {
return Settings$1.withChunkSize(chunkSize, fn);
}
get chunkOverlap() {
return this.#chunkOverlapAsyncLocalStorage.getStore() ?? this.#chunkOverlap;
}
set chunkOverlap(chunkOverlap) {
if (typeof chunkOverlap === "number") {
this.#chunkOverlap = chunkOverlap;
}
}
withChunkOverlap(chunkOverlap, fn) {
return this.#chunkOverlapAsyncLocalStorage.run(chunkOverlap, fn);
}
get prompt() {
return this.#promptAsyncLocalStorage.getStore() ?? this.#prompt;
}
set prompt(prompt) {
this.#prompt = prompt;
}
withPrompt(prompt, fn) {
return this.#promptAsyncLocalStorage.run(prompt, fn);
}
constructor(){
this.#prompt = {};
this.#promptHelper = null;
this.#nodeParser = null;
this.#promptHelperAsyncLocalStorage = new AsyncLocalStorage();
this.#nodeParserAsyncLocalStorage = new AsyncLocalStorage();
this.#chunkOverlapAsyncLocalStorage = new AsyncLocalStorage();
this.#promptAsyncLocalStorage = new AsyncLocalStorage();
}
}
const Settings = new GlobalSettings();
/**
* Error class for output parsing. Due to the nature of LLMs, anytime we use LLM
* to generate structured output, it's possible that it will hallucinate something
* that doesn't match the expected output format. So make sure to catch these
* errors in production.
*/ class OutputParserError extends Error {
constructor(message, options = {}){
super(message, options); // https://github.com/tc39/proposal-error-cause
this.name = "OutputParserError";
if (!this.cause) {
// Need to check for those environments that have implemented the proposal
this.cause = options.cause;
}
this.output = options.output;
// This line is to maintain proper stack trace in V8
// (https://v8.dev/docs/stack-trace-api)
if (Error.captureStackTrace) {
Error.captureStackTrace(this, OutputParserError);
}
}
}
/**
*
* @param text A markdown block with JSON
* @returns parsed JSON object
*/ function parseJsonMarkdown(text) {
text = text.trim();
const left_square = text.indexOf("[");
const left_brace = text.indexOf("{");
let left;
let right;
if (left_square < left_brace && left_square != -1) {
left = left_square;
right = text.lastIndexOf("]");
} else {
left = left_brace;
right = text.lastIndexOf("}");
}
const jsonText = text.substring(left, right + 1);
try {
//Single JSON object case
if (left_square === -1) {
return [
JSON.parse(jsonText)
];
}
//Multiple JSON object case.
return JSON.parse(jsonText);
} catch (e) {
throw new OutputParserError("Not a json markdown", {
output: text
});
}
}
/**
* SubQuestionOutputParser is used to parse the output of the SubQuestionGenerator.
*/ class SubQuestionOutputParser {
parse(output) {
const parsed = parseJsonMarkdown(output);
return {
rawOutput: output,
parsedOutput: parsed
};
}
format(output) {
return output;
}
}
/**
* LLMQuestionGenerator uses the LLM to generate new questions for the LLM using tools and a user query.
*/ class LLMQuestionGenerator extends PromptMixin {
constructor(init){
super();
this.llm = init?.llm ?? Settings.llm;
this.prompt = init?.prompt ?? defaultSubQuestionPrompt;
this.outputParser = init?.outputParser ?? new SubQuestionOutputParser();
}
_getPrompts() {
return {
subQuestion: this.prompt
};
}
_updatePrompts(promptsDict) {
if ("subQuestion" in promptsDict) {
this.prompt = promptsDict.subQuestion;
}
}
async generate(tools, query) {
const toolsStr = toToolDescriptions(tools);
const queryStr = extractText(query);
const prediction = (await this.llm.complete({
prompt: this.prompt.format({
toolsStr,
queryStr
})
})).text;
const structuredOutput = this.outputParser.parse(prediction);
return structuredOutput.parsedOutput;
}
_getPromptModules() {
return {};
}
}
// FS utility helpers
/**
* Checks if a file exists.
* Analogous to the os.path.exists function from Python.
* @param path The path to the file to check.
* @returns A promise that resolves to true if the file exists, false otherwise.
*/ async function exists(path) {
try {
await fs.access(path);
return true;
} catch {
return false;
}
}
const LEARNER_MODES = new Set([
VectorStoreQueryMode.SVM,
VectorStoreQueryMode.LINEAR_REGRESSION,
VectorStoreQueryMode.LOGISTIC_REGRESSION
]);
const MMR_MODE = VectorStoreQueryMode.MMR;
// Mapping of filter operators to metadata filter functions
const OPERATOR_TO_FILTER = {
[FilterOperator.EQ]: ({ key, value }, metadata)=>{
return metadata[key] === parsePrimitiveValue(value);
},
[FilterOperator.NE]: ({ key, value }, metadata)=>{
return metadata[key] !== parsePrimitiveValue(value);
},
[FilterOperator.IN]: ({ key, value }, metadata)=>{
return !!parseArrayValue(value).find((v)=>metadata[key] === v);
},
[FilterOperator.NIN]: ({ key, value }, metadata)=>{
return !parseArrayValue(value).find((v)=>metadata[key] === v);
},
[FilterOperator.ANY]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return parseArrayValue(value).some((v)=>metadata[key].includes(v));
},
[FilterOperator.ALL]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return parseArrayValue(value).every((v)=>metadata[key].includes(v));
},
[FilterOperator.TEXT_MATCH]: ({ key, value }, metadata)=>{
return metadata[key].includes(parsePrimitiveValue(value));
},
[FilterOperator.CONTAINS]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return !!parseArrayValue(metadata[key]).find((v)=>v === value);
},
[FilterOperator.GT]: ({ key, value }, metadata)=>{
return metadata[key] > parsePrimitiveValue(value);
},
[FilterOperator.LT]: ({ key, value }, metadata)=>{
return metadata[key] < parsePrimitiveValue(value);
},
[FilterOperator.GTE]: ({ key, value }, metadata)=>{
return metadata[key] >= parsePrimitiveValue(value);
},
[FilterOperator.LTE]: ({ key, value }, metadata)=>{
return metadata[key] <= parsePrimitiveValue(value);
}
};
// Build a filter function based on the metadata and the preFilters
const buildFilterFn = (metadata, preFilters)=>{
if (!preFilters) return true;
if (!metadata) return false;
const { filters, condition } = preFilters;
const queryCondition = condition || "and"; // default to and
const itemFilterFn = (filter)=>{
if (filter.operator === FilterOperator.IS_EMPTY) {
// for `is_empty` operator, return true if the metadata key is not present or the value is empty
const value = metadata[filter.key];
return value === undefined || value === null || value === "" || Array.isArray(value) && value.length === 0;
}
if (metadata[filter.key] === undefined) {
// for other operators, always return false if the metadata key is not present
return false;
}
const metadataLookupFn = OPERATOR_TO_FILTER[filter.operator];
if (!metadataLookupFn) throw new Error(`Unsupported operator: ${filter.operator}`);
return metadataLookupFn(filter, metadata);
};
if (queryCondition === "and") return filters.every(itemFilterFn);
return filters.some(itemFilterFn);
};
class SimpleVectorStoreData {
constructor(){
this.embeddingDict = {};
this.textIdToRefDocId = {};
this.metadataDict = {};
}
}
class SimpleVectorStore extends BaseVectorStore {
constructor(init){
super(init), this.storesText = false;
this.data = init?.data || new SimpleVectorStoreData();
}
static async fromPersistDir(persistDir = DEFAULT_PERSIST_DIR, embedModel, options) {
const persistPath = path.join(persistDir, "vector_store.json");
return await SimpleVectorStore.fromPersistPath(persistPath, embedModel, options);
}
client() {
return null;
}
async get(textId) {
return this.data.embeddingDict[textId];
}
async add(embeddingResults) {
for (const node of embeddingResults){
this.data.embeddingDict[node.id_] = node.getEmbedding();
if (!node.sourceNode) {
continue;
}
this.data.textIdToRefDocId[node.id_] = node.sourceNode?.nodeId;
// Add metadata to the metadataDict
const metadata = nodeToMetadata(node, true, undefined, false);
delete metadata["_node_content"];
this.data.metadataDict[node.id_] = metadata;
}
if (this.persistPath) {
await this.persist(this.persistPath);
}
return embeddingResults.map((result)=>result.id_);
}
async delete(refDocId) {
const textIdsToDelete = Object.keys(this.data.textIdToRefDocId).filter((textId)=>this.data.textIdToRefDocId[textId] === refDocId);
for (const textId of textIdsToDelete){
delete this.data.embeddingDict[textId];
delete this.data.textIdToRefDocId[textId];
if (this.data.metadataDict) delete this.data.metadataDict[textId];
}
if (this.persistPath) {
await this.persist(this.persistPath);
}
return Promise.resolve();
}
async filterNodes(query) {
const items = Object.entries(this.data.embeddingDict);
const queryFilterFn = (nodeId)=>{
const metadata = this.data.metadataDict[nodeId];
return buildFilterFn(metadata, query.filters);
};
const nodeFilterFn = (nodeId)=>{
if (!query.docIds) return true;
const availableIds = new Set(query.docIds);
return availableIds.has(nodeId);
};
const queriedItems = items.filter((item)=>nodeFilterFn(item[0]) && queryFilterFn(item[0]));
const nodeIds = queriedItems.map((item)=>item[0]);
const embeddings = queriedItems.map((item)=>item[1]);
return {
nodeIds,
embeddings
};
}
async query(query) {
const { nodeIds, embeddings } = await this.filterNodes(query);
const queryEmbedding = query.queryEmbedding;
let topSimilarities, topIds;
if (LEARNER_MODES.has(query.mode)) {
// fixme: unfinished
throw new Error("Learner modes not implemented for SimpleVectorStore yet.");
} else if (query.mode === MMR_MODE) {
const mmrThreshold = query.mmrThreshold;
[topSimilarities, topIds] = getTopKMMREmbeddings(queryEmbedding, embeddings, null, query.similarityTopK, nodeIds, mmrThreshold);
} else if (query.mode === VectorStoreQueryMode.DEFAULT) {
[topSimilarities, topIds] = getTopKEmbeddings(queryEmbedding, embeddings, query.similarityTopK, nodeIds);
} else {
throw new Error(`Invalid query mode: ${query.mode}`);
}
return Promise.resolve({
similarities: topSimilarities,
ids: topIds
});
}
async persist(persistPath = path.join(DEFAULT_PERSIST_DIR, "vector_store.json")) {
await SimpleVectorStore.persistData(persistPath, this.data);
}
static async persistData(persistPath, data) {
const dirPath = path.dirname(persistPath);
if (!await exists(dirPath)) {
await fs.mkdir(dirPath);
}
await fs.writeFile(persistPath, JSON.stringify(data));
}
static async fromPersistPath(persistPath, embedModel, options) {
const logger = options?.logger ?? consoleLogger;
const dirPath = path.dirname(persistPath);
if (!await exists(dirPath)) {
await fs.mkdir(dirPath, {
recursive: true
});
}
let dataDict = {};
if (!await exists(persistPath)) {
logger.log(`Starting new store from path: ${persistPath}`);
} else {
try {
const fileData = await fs.readFile(persistPath);
dataDict = JSON.parse(fileData.toString());
} catch (e) {
throw new Error(`Failed to load data from path: ${persistPath}`, {
cause: e
});
}
}
const data = new SimpleVectorStoreData();
// @ts-expect-error TS2322
data.embeddingDict = dataDict.embeddingDict ?? {};
// @ts-expect-error TS2322
data.textIdToRefDocId = dataDict.textIdToRefDocId ?? {};
// @ts-expect-error TS2322
data.metadataDict = dataDict.metadataDict ?? {};
const store = new SimpleVectorStore({
data,
embedModel
});
store.persistPath = persistPath;
return store;
}
static fromDict(saveDict, embedModel) {
const data = new SimpleVectorStoreData();
data.embeddingDict = saveDict.embeddingDict;
data.textIdToRefDocId = saveDict.textIdToRefDocId;
data.metadataDict = saveDict.metadataDict;
return new SimpleVectorStore({
data,
embedModel
});
}
toDict() {
return {
embeddingDict: this.data.embeddingDict,
textIdToRefDocId: this.data.textIdToRefDocId,
metadataDict: this.data.metadataDict
};
}
}
class SimpleDocumentStore extends KVDocumentStore {
constructor(kvStore, namespace){
kvStore = kvStore || new SimpleKVStore();
namespace = namespace || DEFAULT_NAMESPACE;
super(kvStore, namespace);
this.kvStore = kvStore;
}
static async fromPersistDir(persistDir = DEFAULT_PERSIST_DIR, namespace, options) {
const persistPath = path.join(persistDir, DEFAULT_DOC_STORE_PERSIST_FILENAME);
return await SimpleDocumentStore.fromPersistPath(persistPath, namespace, options);
}
static async fromPersistPath(persistPath, namespace, options) {
const simpleKVStore = await SimpleKVStore.fromPersistPath(persistPath, options);
return new SimpleDocumentStore(simpleKVStore, namespace);
}
async persist(persistPath = path.join(DEFAULT_PERSIST_DIR, DEFAULT_DOC_STORE_PERSIST_FILENAME)) {
if (_.isObject(this.kvStore) && this.kvStore instanceof BaseInMemoryKVStore) {
await this.kvStore.persist(persistPath);
}
}
static fromDict(saveDict, namespace) {
const simpleKVStore = SimpleKVStore.fromDict(saveDict);
return new SimpleDocumentStore(simpleKVStore, namespace);
}
toDict() {
if (_.isObject(this.kvStore) && this.kvStore instanceof SimpleKVStore) {
return this.kvStore.toDict();
}
// If the kvstore is not a SimpleKVStore, you might want to throw an error or return a default value.
throw new Error("KVStore is not a SimpleKVStore");
}
}
async function storageContextFromDefaults({ docStore, indexStore, vectorStore, vectorStores, persistDir }) {
vectorStores = vectorStores ?? {};
if (!persistDir) {
docStore = docStore ?? new SimpleDocumentStore();
indexStore = indexStore ?? new SimpleIndexStore();
if (!(ModalityType.TEXT in vectorStores)) {
vectorStores[ModalityType.TEXT] = vectorStore ?? new SimpleVectorStore();
}
} else {
const embedModel = Settings.embedModel;
docStore = docStore || await SimpleDocumentStore.fromPersistDir(persistDir, DEFAULT_NAMESPACE);
indexStore = indexStore || await SimpleIndexStore.fromPersistDir(persistDir);
if (!(ObjectType.TEXT in vectorStores)) {
vectorStores[ModalityType.TEXT] = vectorStore ?? await SimpleVectorStore.fromPersistDir(persistDir, embedModel);
}
}
return {
docStore,
indexStore,
vectorStores
};
}
export { LLMQuestionGenerator, Settings, SubQuestionOutputParser, parseJsonMarkdown, storageContextFromDefaults };