llamaindex
Version:
<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>
1,474 lines (1,453 loc) • 97.5 kB
JavaScript
Object.defineProperty(exports, '__esModule', { value: true });
var indices = require('@llamaindex/core/indices');
var schema = require('@llamaindex/core/schema');
var docStore = require('@llamaindex/core/storage/doc-store');
var kvStore = require('@llamaindex/core/storage/kv-store');
var env = require('@llamaindex/env');
var global = require('@llamaindex/core/global');
var nodeParser = require('@llamaindex/core/node-parser');
var queryEngine = require('@llamaindex/core/query-engine');
var responseSynthesizers = require('@llamaindex/core/response-synthesizers');
var utils = require('@llamaindex/core/utils');
require('../../selectors/dist/index.cjs');
var prompts = require('@llamaindex/core/prompts');
require('@llamaindex/openai');
var indexStore = require('@llamaindex/core/storage/index-store');
var embeddings = require('@llamaindex/core/embeddings');
var vectorStore = require('@llamaindex/core/vector-store');
var _ = require('lodash');
var dataStructs = require('@llamaindex/core/data-structs');
var retriever = require('@llamaindex/core/retriever');
var chatEngine = require('@llamaindex/core/chat-engine');
var decorator = require('@llamaindex/core/decorator');
var memory = require('@llamaindex/core/memory');
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
var ___default = /*#__PURE__*/_interopDefault(_);
const transformToJSON = (obj)=>{
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const seen = [];
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const replacer = (key, value)=>{
if (value != null && typeof value == "object") {
if (seen.indexOf(value) >= 0) {
return;
}
seen.push(value);
}
return value;
};
// this is a custom replacer function that will allow us to handle circular references
const jsonStr = JSON.stringify(obj, replacer);
return jsonStr;
};
function getTransformationHash(nodes, transform) {
const nodesStr = nodes.map((node)=>node.getContent(schema.MetadataMode.ALL)).join("");
const transformString = transformToJSON(transform);
const hash = env.createSHA256();
hash.update(nodesStr + transformString + transform.id);
return hash.digest();
}
async function classify(docStore, nodes) {
const existingDocIds = Object.values(await docStore.getAllDocumentHashes());
const docIdsFromNodes = new Set();
const dedupedNodes = [];
const unusedDocs = [];
for (const node of nodes){
const refDocId = node.sourceNode?.nodeId || node.id_;
docIdsFromNodes.add(refDocId);
const existingHash = await docStore.getDocumentHash(refDocId);
if (!existingHash) {
// document doesn't exist, so add it
dedupedNodes.push(node);
} else if (existingHash && existingHash !== node.hash) {
// document exists but hash is different, so mark doc as unused and add node as deduped
unusedDocs.push(refDocId);
dedupedNodes.push(node);
}
// otherwise, document exists and hash is the same, so do nothing
}
const missingDocs = existingDocIds.filter((id)=>!docIdsFromNodes.has(id));
return {
dedupedNodes,
missingDocs,
unusedDocs
};
}
class RollbackableTransformComponent extends schema.TransformComponent {
// Remove unused docs from the doc store. It is useful in case
// generating embeddings fails and we want to remove the unused docs
// TODO: override this in UpsertsStrategy if we want to revert removed docs also
async rollback(docStore, nodes) {
const { unusedDocs } = await classify(docStore, nodes);
for (const docId of unusedDocs){
await docStore.deleteDocument(docId, false);
}
docStore.persist();
}
}
/**
* Handle doc store duplicates by checking all hashes.
*/ class DuplicatesStrategy extends RollbackableTransformComponent {
constructor(docStore){
super(async (nodes)=>{
const hashes = await this.docStore.getAllDocumentHashes();
const currentHashes = new Set();
const nodesToRun = [];
for (const node of nodes){
if (!(node.hash in hashes) && !currentHashes.has(node.hash)) {
await this.docStore.setDocumentHash(node.id_, node.hash);
nodesToRun.push(node);
currentHashes.add(node.hash);
}
}
await this.docStore.addDocuments(nodesToRun, true);
return nodesToRun;
});
this.docStore = docStore;
}
}
/**
* Handle docstore upserts by checking hashes and ids.
* Identify missing docs and delete them from docstore and vector store
*/ class UpsertsAndDeleteStrategy extends RollbackableTransformComponent {
constructor(docStore, vectorStores){
super(async (nodes)=>{
const { dedupedNodes, missingDocs, unusedDocs } = await classify(this.docStore, nodes);
// remove unused docs
for (const refDocId of unusedDocs){
await this.docStore.deleteRefDoc(refDocId, false);
if (this.vectorStores) {
for (const vectorStore of this.vectorStores){
await vectorStore.delete(refDocId);
}
}
}
// remove missing docs
for (const docId of missingDocs){
await this.docStore.deleteDocument(docId, true);
if (this.vectorStores) {
for (const vectorStore of this.vectorStores){
await vectorStore.delete(docId);
}
}
}
await this.docStore.addDocuments(dedupedNodes, true);
return dedupedNodes;
});
this.docStore = docStore;
this.vectorStores = vectorStores;
}
}
/**
* Handles doc store upserts by checking hashes and ids.
*/ class UpsertsStrategy extends RollbackableTransformComponent {
constructor(docStore, vectorStores){
super(async (nodes)=>{
const { dedupedNodes, unusedDocs } = await classify(this.docStore, nodes);
// remove unused docs
for (const refDocId of unusedDocs){
await this.docStore.deleteRefDoc(refDocId, false);
if (this.vectorStores) {
for (const vectorStore of this.vectorStores){
await vectorStore.delete(refDocId);
}
}
}
// add non-duplicate docs
await this.docStore.addDocuments(dedupedNodes, true);
return dedupedNodes;
});
this.docStore = docStore;
this.vectorStores = vectorStores;
}
}
/**
* Document de-deduplication strategies work by comparing the hashes or ids stored in the document store.
* They require a document store to be set which must be persisted across pipeline runs.
*/ var DocStoreStrategy = /*#__PURE__*/ function(DocStoreStrategy) {
// Use upserts to handle duplicates. Checks if the a document is already in the doc store based on its id. If it is not, or if the hash of the document is updated, it will update the document in the doc store and run the transformations.
DocStoreStrategy["UPSERTS"] = "upserts";
// Only handle duplicates. Checks if the hash of a document is already in the doc store. Only then it will add the document to the doc store and run the transformations
DocStoreStrategy["DUPLICATES_ONLY"] = "duplicates_only";
// Use upserts and delete to handle duplicates. Like the upsert strategy but it will also delete non-existing documents from the doc store
DocStoreStrategy["UPSERTS_AND_DELETE"] = "upserts_and_delete";
DocStoreStrategy["NONE"] = "none";
return DocStoreStrategy;
}({});
class NoOpStrategy extends RollbackableTransformComponent {
constructor(){
super(async (nodes)=>nodes);
}
}
function createDocStoreStrategy(docStoreStrategy, docStore, vectorStores = []) {
if (docStoreStrategy === "none") {
return new NoOpStrategy();
}
if (!docStore) {
throw new Error("docStore is required to create a doc store strategy.");
}
if (vectorStores.length > 0) {
if (docStoreStrategy === "upserts") {
return new UpsertsStrategy(docStore, vectorStores);
} else if (docStoreStrategy === "upserts_and_delete") {
return new UpsertsAndDeleteStrategy(docStore, vectorStores);
} else if (docStoreStrategy === "duplicates_only") {
return new DuplicatesStrategy(docStore);
} else {
throw new Error(`Invalid docstore strategy: ${docStoreStrategy}`);
}
} else {
if (docStoreStrategy === "upserts") {
console.warn("Docstore strategy set to upserts, but no vector store. Switching to duplicates_only strategy.");
} else if (docStoreStrategy === "upserts_and_delete") {
console.warn("Docstore strategy set to upserts and delete, but no vector store. Switching to duplicates_only strategy.");
}
return new DuplicatesStrategy(docStore);
}
}
async function runTransformations(nodesToRun, transformations, // eslint-disable-next-line @typescript-eslint/no-explicit-any
transformOptions = {}, { inPlace = true, cache, docStoreStrategy } = {}) {
let nodes = nodesToRun;
if (!inPlace) {
nodes = [
...nodesToRun
];
}
if (docStoreStrategy) {
nodes = await docStoreStrategy(nodes);
}
for (const transform of transformations){
if (cache) {
const hash = getTransformationHash(nodes, transform);
const cachedNodes = await cache.get(hash);
if (cachedNodes) {
nodes = cachedNodes;
} else {
nodes = await transform(nodes, transformOptions);
await cache.put(hash, nodes);
}
} else {
nodes = await transform(nodes, transformOptions);
}
}
return nodes;
}
async function addNodesToVectorStores(nodes, vectorStores, nodesAdded) {
const nodeMap = schema.splitNodesByType(nodes);
for(const type in nodeMap){
const nodes = nodeMap[type];
if (nodes) {
const vectorStore = vectorStores[type];
if (!vectorStore) {
throw new Error(`Cannot insert nodes of type ${type} without assigned vector store`);
}
const newIds = await vectorStore.add(nodes);
if (nodesAdded) {
await nodesAdded(newIds, nodes, vectorStore);
}
}
}
}
/**
* @internal
*/ class GlobalSettings {
#prompt;
#promptHelper;
#nodeParser;
#chunkOverlap;
#promptHelperAsyncLocalStorage;
#nodeParserAsyncLocalStorage;
#chunkOverlapAsyncLocalStorage;
#promptAsyncLocalStorage;
get debug() {
return global.Settings.debug;
}
get llm() {
return global.Settings.llm;
}
set llm(llm) {
global.Settings.llm = llm;
}
withLLM(llm, fn) {
return global.Settings.withLLM(llm, fn);
}
get promptHelper() {
if (this.#promptHelper === null) {
this.#promptHelper = new indices.PromptHelper();
}
return this.#promptHelperAsyncLocalStorage.getStore() ?? this.#promptHelper;
}
set promptHelper(promptHelper) {
this.#promptHelper = promptHelper;
}
withPromptHelper(promptHelper, fn) {
return this.#promptHelperAsyncLocalStorage.run(promptHelper, fn);
}
get embedModel() {
return global.Settings.embedModel;
}
set embedModel(embedModel) {
global.Settings.embedModel = embedModel;
}
withEmbedModel(embedModel, fn) {
return global.Settings.withEmbedModel(embedModel, fn);
}
get nodeParser() {
if (this.#nodeParser === null) {
this.#nodeParser = new nodeParser.SentenceSplitter({
chunkSize: this.chunkSize,
chunkOverlap: this.chunkOverlap
});
}
return this.#nodeParserAsyncLocalStorage.getStore() ?? this.#nodeParser;
}
set nodeParser(nodeParser) {
this.#nodeParser = nodeParser;
}
withNodeParser(nodeParser, fn) {
return this.#nodeParserAsyncLocalStorage.run(nodeParser, fn);
}
get callbackManager() {
return global.Settings.callbackManager;
}
set callbackManager(callbackManager) {
global.Settings.callbackManager = callbackManager;
}
withCallbackManager(callbackManager, fn) {
return global.Settings.withCallbackManager(callbackManager, fn);
}
set chunkSize(chunkSize) {
global.Settings.chunkSize = chunkSize;
}
get chunkSize() {
return global.Settings.chunkSize;
}
withChunkSize(chunkSize, fn) {
return global.Settings.withChunkSize(chunkSize, fn);
}
get chunkOverlap() {
return this.#chunkOverlapAsyncLocalStorage.getStore() ?? this.#chunkOverlap;
}
set chunkOverlap(chunkOverlap) {
if (typeof chunkOverlap === "number") {
this.#chunkOverlap = chunkOverlap;
}
}
withChunkOverlap(chunkOverlap, fn) {
return this.#chunkOverlapAsyncLocalStorage.run(chunkOverlap, fn);
}
get prompt() {
return this.#promptAsyncLocalStorage.getStore() ?? this.#prompt;
}
set prompt(prompt) {
this.#prompt = prompt;
}
withPrompt(prompt, fn) {
return this.#promptAsyncLocalStorage.run(prompt, fn);
}
constructor(){
this.#prompt = {};
this.#promptHelper = null;
this.#nodeParser = null;
this.#promptHelperAsyncLocalStorage = new env.AsyncLocalStorage();
this.#nodeParserAsyncLocalStorage = new env.AsyncLocalStorage();
this.#chunkOverlapAsyncLocalStorage = new env.AsyncLocalStorage();
this.#promptAsyncLocalStorage = new env.AsyncLocalStorage();
}
}
const Settings = new GlobalSettings();
const DEFAULT_NAME = "query_engine_tool";
const DEFAULT_DESCRIPTION = "Useful for running a natural language query against a knowledge base and get back a natural language response.";
const DEFAULT_PARAMETERS = {
type: "object",
properties: {
query: {
type: "string",
description: "The query to search for"
}
},
required: [
"query"
]
};
class QueryEngineTool {
constructor({ queryEngine, metadata, includeSourceNodes }){
this.queryEngine = queryEngine;
this.metadata = {
name: metadata?.name ?? DEFAULT_NAME,
description: metadata?.description ?? DEFAULT_DESCRIPTION,
parameters: metadata?.parameters ?? DEFAULT_PARAMETERS
};
this.includeSourceNodes = includeSourceNodes ?? false;
}
async call({ query }) {
const response = await this.queryEngine.query({
query
});
if (!this.includeSourceNodes) {
return {
content: response.message.content
};
}
return {
content: response.message.content,
sourceNodes: response.sourceNodes
};
}
}
/**
* Indexes are the data structure that we store our nodes and embeddings in so
* they can be retrieved for our queries.
*/ class BaseIndex {
constructor(init){
this.storageContext = init.storageContext;
this.docStore = init.docStore;
this.indexStore = init.indexStore;
this.indexStruct = init.indexStruct;
}
/**
* Returns a query tool by calling asQueryEngine.
* Either options or retriever can be passed, but not both.
* If options are provided, they are passed to generate a retriever.
*/ asQueryTool(params) {
if (params.options) {
params.retriever = this.asRetriever(params.options);
}
return new QueryEngineTool({
queryEngine: this.asQueryEngine(params),
metadata: params?.metadata,
includeSourceNodes: params?.includeSourceNodes ?? false
});
}
/**
* Insert a document into the index.
* @param document
*/ async insert(document) {
const nodes = await runTransformations([
document
], [
Settings.nodeParser
]);
await this.insertNodes(nodes);
await this.docStore.setDocumentHash(document.id_, document.hash);
}
/**
* Alias for asRetriever
* @param options
*/ // eslint-disable-next-line @typescript-eslint/no-explicit-any
retriever(options) {
return this.asRetriever(options);
}
/**
* Alias for asQueryEngine
* @param options you can supply your own custom Retriever and ResponseSynthesizer
*/ queryEngine(options) {
return this.asQueryEngine(options);
}
/**
* Alias for asQueryTool
* Either options or retriever can be passed, but not both.
* If options are provided, they are passed to generate a retriever.
*/ queryTool(params) {
return this.asQueryTool(params);
}
}
// FS utility helpers
/**
* Checks if a file exists.
* Analogous to the os.path.exists function from Python.
* @param path The path to the file to check.
* @returns A promise that resolves to true if the file exists, false otherwise.
*/ async function exists(path) {
try {
await env.fs.access(path);
return true;
} catch {
return false;
}
}
const LEARNER_MODES = new Set([
vectorStore.VectorStoreQueryMode.SVM,
vectorStore.VectorStoreQueryMode.LINEAR_REGRESSION,
vectorStore.VectorStoreQueryMode.LOGISTIC_REGRESSION
]);
const MMR_MODE = vectorStore.VectorStoreQueryMode.MMR;
// Mapping of filter operators to metadata filter functions
const OPERATOR_TO_FILTER = {
[vectorStore.FilterOperator.EQ]: ({ key, value }, metadata)=>{
return metadata[key] === vectorStore.parsePrimitiveValue(value);
},
[vectorStore.FilterOperator.NE]: ({ key, value }, metadata)=>{
return metadata[key] !== vectorStore.parsePrimitiveValue(value);
},
[vectorStore.FilterOperator.IN]: ({ key, value }, metadata)=>{
return !!vectorStore.parseArrayValue(value).find((v)=>metadata[key] === v);
},
[vectorStore.FilterOperator.NIN]: ({ key, value }, metadata)=>{
return !vectorStore.parseArrayValue(value).find((v)=>metadata[key] === v);
},
[vectorStore.FilterOperator.ANY]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return vectorStore.parseArrayValue(value).some((v)=>metadata[key].includes(v));
},
[vectorStore.FilterOperator.ALL]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return vectorStore.parseArrayValue(value).every((v)=>metadata[key].includes(v));
},
[vectorStore.FilterOperator.TEXT_MATCH]: ({ key, value }, metadata)=>{
return metadata[key].includes(vectorStore.parsePrimitiveValue(value));
},
[vectorStore.FilterOperator.CONTAINS]: ({ key, value }, metadata)=>{
if (!Array.isArray(metadata[key])) return false;
return !!vectorStore.parseArrayValue(metadata[key]).find((v)=>v === value);
},
[vectorStore.FilterOperator.GT]: ({ key, value }, metadata)=>{
return metadata[key] > vectorStore.parsePrimitiveValue(value);
},
[vectorStore.FilterOperator.LT]: ({ key, value }, metadata)=>{
return metadata[key] < vectorStore.parsePrimitiveValue(value);
},
[vectorStore.FilterOperator.GTE]: ({ key, value }, metadata)=>{
return metadata[key] >= vectorStore.parsePrimitiveValue(value);
},
[vectorStore.FilterOperator.LTE]: ({ key, value }, metadata)=>{
return metadata[key] <= vectorStore.parsePrimitiveValue(value);
}
};
// Build a filter function based on the metadata and the preFilters
const buildFilterFn = (metadata, preFilters)=>{
if (!preFilters) return true;
if (!metadata) return false;
const { filters, condition } = preFilters;
const queryCondition = condition || "and"; // default to and
const itemFilterFn = (filter)=>{
if (filter.operator === vectorStore.FilterOperator.IS_EMPTY) {
// for `is_empty` operator, return true if the metadata key is not present or the value is empty
const value = metadata[filter.key];
return value === undefined || value === null || value === "" || Array.isArray(value) && value.length === 0;
}
if (metadata[filter.key] === undefined) {
// for other operators, always return false if the metadata key is not present
return false;
}
const metadataLookupFn = OPERATOR_TO_FILTER[filter.operator];
if (!metadataLookupFn) throw new Error(`Unsupported operator: ${filter.operator}`);
return metadataLookupFn(filter, metadata);
};
if (queryCondition === "and") return filters.every(itemFilterFn);
return filters.some(itemFilterFn);
};
class SimpleVectorStoreData {
constructor(){
this.embeddingDict = {};
this.textIdToRefDocId = {};
this.metadataDict = {};
}
}
class SimpleVectorStore extends vectorStore.BaseVectorStore {
constructor(init){
super(init), this.storesText = false;
this.data = init?.data || new SimpleVectorStoreData();
}
static async fromPersistDir(persistDir = global.DEFAULT_PERSIST_DIR, embedModel) {
const persistPath = env.path.join(persistDir, "vector_store.json");
return await SimpleVectorStore.fromPersistPath(persistPath, embedModel);
}
client() {
return null;
}
async get(textId) {
return this.data.embeddingDict[textId];
}
async add(embeddingResults) {
for (const node of embeddingResults){
this.data.embeddingDict[node.id_] = node.getEmbedding();
if (!node.sourceNode) {
continue;
}
this.data.textIdToRefDocId[node.id_] = node.sourceNode?.nodeId;
// Add metadata to the metadataDict
const metadata = vectorStore.nodeToMetadata(node, true, undefined, false);
delete metadata["_node_content"];
this.data.metadataDict[node.id_] = metadata;
}
if (this.persistPath) {
await this.persist(this.persistPath);
}
return embeddingResults.map((result)=>result.id_);
}
async delete(refDocId) {
const textIdsToDelete = Object.keys(this.data.textIdToRefDocId).filter((textId)=>this.data.textIdToRefDocId[textId] === refDocId);
for (const textId of textIdsToDelete){
delete this.data.embeddingDict[textId];
delete this.data.textIdToRefDocId[textId];
if (this.data.metadataDict) delete this.data.metadataDict[textId];
}
if (this.persistPath) {
await this.persist(this.persistPath);
}
return Promise.resolve();
}
async filterNodes(query) {
const items = Object.entries(this.data.embeddingDict);
const queryFilterFn = (nodeId)=>{
const metadata = this.data.metadataDict[nodeId];
return buildFilterFn(metadata, query.filters);
};
const nodeFilterFn = (nodeId)=>{
if (!query.docIds) return true;
const availableIds = new Set(query.docIds);
return availableIds.has(nodeId);
};
const queriedItems = items.filter((item)=>nodeFilterFn(item[0]) && queryFilterFn(item[0]));
const nodeIds = queriedItems.map((item)=>item[0]);
const embeddings = queriedItems.map((item)=>item[1]);
return {
nodeIds,
embeddings
};
}
async query(query) {
const { nodeIds, embeddings: embeddings$1 } = await this.filterNodes(query);
const queryEmbedding = query.queryEmbedding;
let topSimilarities, topIds;
if (LEARNER_MODES.has(query.mode)) {
// fixme: unfinished
throw new Error("Learner modes not implemented for SimpleVectorStore yet.");
} else if (query.mode === MMR_MODE) {
const mmrThreshold = query.mmrThreshold;
[topSimilarities, topIds] = embeddings.getTopKMMREmbeddings(queryEmbedding, embeddings$1, null, query.similarityTopK, nodeIds, mmrThreshold);
} else if (query.mode === vectorStore.VectorStoreQueryMode.DEFAULT) {
[topSimilarities, topIds] = embeddings.getTopKEmbeddings(queryEmbedding, embeddings$1, query.similarityTopK, nodeIds);
} else {
throw new Error(`Invalid query mode: ${query.mode}`);
}
return Promise.resolve({
similarities: topSimilarities,
ids: topIds
});
}
async persist(persistPath = env.path.join(global.DEFAULT_PERSIST_DIR, "vector_store.json")) {
await SimpleVectorStore.persistData(persistPath, this.data);
}
static async persistData(persistPath, data) {
const dirPath = env.path.dirname(persistPath);
if (!await exists(dirPath)) {
await env.fs.mkdir(dirPath);
}
await env.fs.writeFile(persistPath, JSON.stringify(data));
}
static async fromPersistPath(persistPath, embeddingModel) {
const dirPath = env.path.dirname(persistPath);
if (!await exists(dirPath)) {
await env.fs.mkdir(dirPath, {
recursive: true
});
}
let dataDict = {};
try {
const fileData = await env.fs.readFile(persistPath);
dataDict = JSON.parse(fileData.toString());
} catch (e) {
console.error(`No valid data found at path: ${persistPath} starting new store.`);
// persist empty data, to ignore this error in the future
await SimpleVectorStore.persistData(persistPath, new SimpleVectorStoreData());
}
const data = new SimpleVectorStoreData();
// @ts-expect-error TS2322
data.embeddingDict = dataDict.embeddingDict ?? {};
// @ts-expect-error TS2322
data.textIdToRefDocId = dataDict.textIdToRefDocId ?? {};
// @ts-expect-error TS2322
data.metadataDict = dataDict.metadataDict ?? {};
const store = new SimpleVectorStore({
data,
embeddingModel
});
store.persistPath = persistPath;
return store;
}
static fromDict(saveDict, embeddingModel) {
const data = new SimpleVectorStoreData();
data.embeddingDict = saveDict.embeddingDict;
data.textIdToRefDocId = saveDict.textIdToRefDocId;
data.metadataDict = saveDict.metadataDict;
return new SimpleVectorStore({
data,
embeddingModel
});
}
toDict() {
return {
embeddingDict: this.data.embeddingDict,
textIdToRefDocId: this.data.textIdToRefDocId,
metadataDict: this.data.metadataDict
};
}
}
class SimpleDocumentStore extends docStore.KVDocumentStore {
constructor(kvStore$1, namespace){
kvStore$1 = kvStore$1 || new kvStore.SimpleKVStore();
namespace = namespace || global.DEFAULT_NAMESPACE;
super(kvStore$1, namespace);
this.kvStore = kvStore$1;
}
static async fromPersistDir(persistDir = global.DEFAULT_PERSIST_DIR, namespace) {
const persistPath = env.path.join(persistDir, global.DEFAULT_DOC_STORE_PERSIST_FILENAME);
return await SimpleDocumentStore.fromPersistPath(persistPath, namespace);
}
static async fromPersistPath(persistPath, namespace) {
const simpleKVStore = await kvStore.SimpleKVStore.fromPersistPath(persistPath);
return new SimpleDocumentStore(simpleKVStore, namespace);
}
async persist(persistPath = env.path.join(global.DEFAULT_PERSIST_DIR, global.DEFAULT_DOC_STORE_PERSIST_FILENAME)) {
if (___default.default.isObject(this.kvStore) && this.kvStore instanceof kvStore.BaseInMemoryKVStore) {
await this.kvStore.persist(persistPath);
}
}
static fromDict(saveDict, namespace) {
const simpleKVStore = kvStore.SimpleKVStore.fromDict(saveDict);
return new SimpleDocumentStore(simpleKVStore, namespace);
}
toDict() {
if (___default.default.isObject(this.kvStore) && this.kvStore instanceof kvStore.SimpleKVStore) {
return this.kvStore.toDict();
}
// If the kvstore is not a SimpleKVStore, you might want to throw an error or return a default value.
throw new Error("KVStore is not a SimpleKVStore");
}
}
async function storageContextFromDefaults({ docStore, indexStore: indexStore$1, vectorStore, vectorStores, persistDir }) {
vectorStores = vectorStores ?? {};
if (!persistDir) {
docStore = docStore ?? new SimpleDocumentStore();
indexStore$1 = indexStore$1 ?? new indexStore.SimpleIndexStore();
if (!(schema.ModalityType.TEXT in vectorStores)) {
vectorStores[schema.ModalityType.TEXT] = vectorStore ?? new SimpleVectorStore();
}
} else {
const embedModel = Settings.embedModel;
docStore = docStore || await SimpleDocumentStore.fromPersistDir(persistDir, global.DEFAULT_NAMESPACE);
indexStore$1 = indexStore$1 || await indexStore.SimpleIndexStore.fromPersistDir(persistDir);
if (!(schema.ObjectType.TEXT in vectorStores)) {
vectorStores[schema.ModalityType.TEXT] = vectorStore ?? await SimpleVectorStore.fromPersistDir(persistDir, embedModel);
}
}
return {
docStore,
indexStore: indexStore$1,
vectorStores
};
}
// generate from "tsup ./src/index.js --format esm"
var __getOwnPropNames = Object.getOwnPropertyNames;
var __commonJS = (cb, mod)=>function __require() {
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = {
exports: {}
}).exports, mod), mod.exports;
};
// src/stopwords.js
var require_stopwords = __commonJS({
"src/stopwords.js" (exports, module) {
module.exports = {
stopwords: [
"a",
"about",
"above",
"across",
"after",
"again",
"against",
"all",
"almost",
"alone",
"along",
"already",
"also",
"although",
"always",
"among",
"an",
"and",
"another",
"any",
"anybody",
"anyone",
"anything",
"anywhere",
"are",
"area",
"areas",
"around",
"as",
"ask",
"asked",
"asking",
"asks",
"at",
"away",
"b",
"back",
"backed",
"backing",
"backs",
"be",
"because",
"become",
"becomes",
"became",
"been",
"before",
"began",
"behind",
"being",
"beings",
"best",
"better",
"between",
"big",
"both",
"but",
"by",
"c",
"came",
"can",
"cannot",
"case",
"cases",
"certain",
"certainly",
"clear",
"clearly",
"come",
"contains",
"could",
"d",
"did",
"differ",
"different",
"differently",
"do",
"does",
"done",
"down",
"downed",
"downing",
"downs",
"during",
"e",
"each",
"early",
"either",
"end",
"ended",
"ending",
"ends",
"enough",
"even",
"evenly",
"ever",
"every",
"everybody",
"everyone",
"everything",
"everywhere",
"f",
"face",
"faces",
"fact",
"facts",
"far",
"felt",
"few",
"find",
"finds",
"first",
"for",
"four",
"from",
"full",
"fully",
"further",
"furthered",
"furthering",
"furthers",
"g",
"gave",
"general",
"generally",
"get",
"gets",
"give",
"given",
"gives",
"go",
"going",
"good",
"goods",
"got",
"great",
"greater",
"greatest",
"group",
"grouped",
"grouping",
"groups",
"h",
"had",
"has",
"have",
"having",
"he",
"her",
"herself",
"here",
"high",
"higher",
"highest",
"him",
"himself",
"his",
"how",
"however",
"i",
"if",
"important",
"in",
"interest",
"interested",
"interesting",
"interests",
"into",
"is",
"it",
"its",
"itself",
"j",
"just",
"k",
"keep",
"keeps",
"kind",
"knew",
"know",
"known",
"knows",
"l",
"large",
"largely",
"last",
"later",
"latest",
"least",
"less",
"let",
"lets",
"like",
"likely",
"long",
"longer",
"longest",
"m",
"made",
"make",
"making",
"man",
"many",
"may",
"me",
"member",
"members",
"men",
"might",
"more",
"most",
"mostly",
"mr",
"mrs",
"much",
"must",
"my",
"myself",
"n",
"necessary",
"need",
"needed",
"needing",
"needs",
"never",
"new",
"newer",
"newest",
"next",
"no",
"non",
"not",
"nobody",
"noone",
"nothing",
"now",
"nowhere",
"number",
"numbers",
"o",
"of",
"off",
"often",
"old",
"older",
"oldest",
"on",
"once",
"one",
"only",
"open",
"opened",
"opening",
"opens",
"or",
"order",
"ordered",
"ordering",
"orders",
"other",
"others",
"our",
"out",
"over",
"p",
"part",
"parted",
"parting",
"parts",
"per",
"perhaps",
"place",
"places",
"point",
"pointed",
"pointing",
"points",
"possible",
"present",
"presented",
"presenting",
"presents",
"problem",
"problems",
"put",
"puts",
"q",
"quite",
"r",
"rather",
"really",
"right",
"room",
"rooms",
"s",
"said",
"same",
"saw",
"say",
"says",
"second",
"seconds",
"see",
"sees",
"seem",
"seemed",
"seeming",
"seems",
"several",
"shall",
"she",
"should",
"show",
"showed",
"showing",
"shows",
"side",
"sides",
"since",
"small",
"smaller",
"smallest",
"so",
"some",
"somebody",
"someone",
"something",
"somewhere",
"state",
"states",
"still",
"such",
"sure",
"t",
"take",
"taken",
"than",
"that",
"the",
"their",
"them",
"then",
"there",
"therefore",
"these",
"they",
"thing",
"things",
"think",
"thinks",
"this",
"those",
"though",
"thought",
"thoughts",
"three",
"through",
"thus",
"to",
"today",
"together",
"too",
"took",
"toward",
"turn",
"turned",
"turning",
"turns",
"two",
"u",
"under",
"until",
"up",
"upon",
"us",
"use",
"uses",
"used",
"v",
"very",
"w",
"want",
"wanted",
"wanting",
"wants",
"was",
"way",
"ways",
"we",
"well",
"wells",
"went",
"were",
"what",
"when",
"where",
"whether",
"which",
"while",
"who",
"whole",
"whose",
"why",
"will",
"with",
"within",
"without",
"work",
"worked",
"working",
"works",
"would",
"y",
"year",
"years",
"yet",
"you",
"young",
"younger",
"youngest",
"your",
"yours",
"eoc",
"mu",
"sigma",
"mu sigma",
"musigma",
"client",
"clients",
"capabilities",
"capability",
"firm",
"firms",
"biggest",
"-"
]
};
}
});
const { fromPairs, sortBy, toPairs } = ___default.default;
var stopwords = require_stopwords();
function isNumber(str) {
return /\d/.test(str);
}
function isAcceptable(phrase, minCharLength, maxWordsLength) {
if (phrase < minCharLength) {
return false;
}
let words = phrase.split(" ");
if (words.length > maxWordsLength) {
return false;
}
let digits = 0;
let alpha = 0;
for(let i = 0; i < phrase.length; i++){
if (/\d/.test(phrase[i])) digits += 1;
if (/[a-zA-Z]/.test(phrase[i])) alpha += 1;
}
if (alpha == 0) {
return false;
}
if (digits > alpha) {
return false;
}
return true;
}
function countOccurances(haystack, needle) {
return haystack.reduce((n, value)=>{
return n + (value === needle);
}, 0);
}
function generateCandidateKeywordScores(phraseList, wordScore, minKeywordFrequency = 1) {
let keywordCandidates = {};
phraseList.forEach((phrase)=>{
if (minKeywordFrequency > 1) {
if (countOccurances(phraseList, phrase) < minKeywordFrequency) {
return;
}
}
phrase in keywordCandidates || (keywordCandidates[phrase] = 0);
let wordList = separateWords(phrase, 0);
let candidateScore = 0;
wordList.forEach((word)=>{
candidateScore += wordScore[word];
keywordCandidates[phrase] = candidateScore;
});
});
return keywordCandidates;
}
function separateWords(text, minWordReturnSize) {
let wordDelimiters = /[^a-zA-Z0-9_\+\-/]/;
let words = [];
text.split(wordDelimiters).forEach((singleWord)=>{
let currentWord = singleWord.trim().toLowerCase();
if (currentWord.length > minWordReturnSize && currentWord != "" && !isNumber(currentWord)) {
words.push(currentWord);
}
});
return words;
}
function calculateWordScores(phraseList) {
let wordFrequency = {};
let wordDegree = {};
phraseList.forEach((phrase)=>{
let wordList = separateWords(phrase, 0);
let wordListLength = wordList.length;
let wordListDegree = wordListLength - 1;
wordList.forEach((word)=>{
word in wordFrequency || (wordFrequency[word] = 0);
wordFrequency[word] += 1;
word in wordDegree || (wordDegree[word] = 0);
wordDegree[word] += wordListDegree;
});
});
Object.keys(wordFrequency).forEach((item)=>{
wordDegree[item] = wordDegree[item] + wordFrequency[item];
});
let wordScore = {};
Object.keys(wordFrequency).forEach((item)=>{
item in wordScore || (wordScore[item] = 0);
wordScore[item] = wordDegree[item] / (wordFrequency[item] * 1);
});
return wordScore;
}
function generateCandidateKeywords(sentenceList, stopWordPattern, minCharLength = 1, maxWordsLength = 5) {
let phraseList = [];
sentenceList.forEach((sentence)=>{
let tmp = stopWordPattern[Symbol.replace](sentence, "|");
let phrases = tmp.split("|");
phrases.forEach((ph)=>{
let phrase = ph.trim().toLowerCase();
if (phrase != "" && isAcceptable(phrase, minCharLength, maxWordsLength)) {
phraseList.push(phrase);
}
});
});
return phraseList;
}
function buildStopWordRegex(path) {
let stopWordList = loadStopWords();
let stopWordRegexList = [];
stopWordList.forEach((word)=>{
if (/\w+/.test(word)) {
let wordRegex = `\\b${word}\\b`;
stopWordRegexList.push(wordRegex);
}
});
let stopWordPattern = new RegExp(stopWordRegexList.join("|"), "ig");
return stopWordPattern;
}
function splitSentences(text) {
let sentenceDelimiters = /[\[\]\n.!?,;:\t\\-\\"\\(\\)\\\'\u2019\u2013]/;
return text.split(sentenceDelimiters);
}
function loadStopWords(path) {
let contents = stopwords.stopwords;
return contents;
}
function rake(text, stopWordsPath, minCharLength = 3, maxWordsLength = 5, minKeywordFrequency = 1) {
let stopWordPattern = buildStopWordRegex();
let sentenceList = splitSentences(text);
let phraseList = generateCandidateKeywords(sentenceList, stopWordPattern, minCharLength, maxWordsLength);
let wordScores = calculateWordScores(phraseList);
let keywordCandidates = generateCandidateKeywordScores(phraseList, wordScores, minKeywordFrequency);
let sortedKeywords = fromPairs(sortBy(toPairs(keywordCandidates), (pair)=>pair[1]).reverse());
return sortedKeywords;
}
// Get subtokens from a list of tokens., filtering for stopwords.
function expandTokensWithSubtokens(tokens) {
const results = new Set();
const regex = /\w+/g;
for (const token of tokens){
results.add(token);
const subTokens = token.match(regex);
if (subTokens && subTokens.length > 1) {
for (const w of subTokens){
results.add(w);
}
}
}
return results;
}
function extractKeywordsGivenResponse(response, startToken = "", lowercase = true) {
const results = [];
response = response.trim();
if (response.startsWith(startToken)) {
response = response.substring(startToken.length);
}
const keywords = response.split(",");
for (const k of keywords){
let rk = k;
if (lowercase) {
rk = rk.toLowerCase();
}
results.push(rk.trim());
}
return expandTokensWithSubtokens(new Set(results));
}
function simpleExtractKeywords(textChunk, maxKeywords) {
const regex = /\w+/g;
const tokens = [
...textChunk.matchAll(regex)
].map((token)=>token[0].toLowerCase().trim());
// Creating a frequency map
const valueCounts = {};
for (const token of tokens){
valueCounts[token] = (valueCounts[token] || 0) + 1;
}
// Sorting tokens by frequency
const sortedTokens = Object.keys(valueCounts).sort((a, b)=>valueCounts[b] - valueCounts[a]);
const keywords = maxKeywords ? sortedTokens.slice(0, maxKeywords) : sortedTokens;
return new Set(keywords);
}
function rakeExtractKeywords(textChunk, maxKeywords) {
const keywords = Object.keys(rake(textChunk));
const limitedKeywords = maxKeywords ? keywords.slice(0, maxKeywords) : keywords;
return new Set(limitedKeywords);
}
function applyDecs2203RFactory() {
function createAddInitializerMethod(initializers, decoratorFinishedRef) {
return function addInitializer(initializer) {
assertNotFinished(decoratorFinishedRef, "addInitializer");
assertCallable(initializer, "An initializer");
initializers.push(initializer);
};
}
function memberDec(dec, name, desc, initializers, kind, isStatic, isPrivate, metadata, value) {
var kindStr;
switch(kind){
case 1:
kindStr = "accessor";
break;
case 2:
kindStr = "method";
break;
case 3:
kindStr = "getter";
break;
case 4:
kindStr = "setter";
break;
default:
kindStr = "field";
}
var ctx = {
kind: kindStr,
name: isPrivate ? "#" + name : name,
static: isStatic,
private: isPrivate,
metadata: metadata
};
var decoratorFinishedRef = {
v: false
};
ctx.addInitializer = createAddInitializerMethod(initializers, decoratorFinishedRef);
var get, set;
if (kind === 0) {
if (isPrivate) {
get = desc.get;
set = desc.set;
} else {
get = function() {
return this[name];
};
set = function(v) {
this[name] = v;
};
}
} else if (kind === 2) {
get = function() {
return desc.value;
};
} else {
if (kind === 1 || kind === 3) {
get = function() {
return desc.get.call(this);
};
}
if (kind === 1 || kind === 4) {
set = function(v) {
desc.set.call(this, v);
};
}
}
ctx.access = get && set ? {
get: get,
set: set
} : get ? {
get: get
} : {
set: set
};
try {
return dec(value, ctx);
} finally{
decoratorFinishedRef.v = true;
}
}
function assertNotFinished(decoratorFinishedRef, fnName) {
if (decoratorFinishedRef