llamaindex
Version:
<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>
678 lines (669 loc) • 25.3 kB
JavaScript
import { client, searchPipelinesApiV1PipelinesGet, listProjectsApiV1ProjectsGet, uploadFileApiV1FilesPost, addFilesToPipelineApiApiV1PipelinesPipelineIdFilesPut, getPipelineFileStatusApiV1PipelinesPipelineIdFilesFileIdStatusGet, listPipelineFilesApiV1PipelinesPipelineIdFilesGet, readFileContentApiV1FilesIdContentGet, runSearchApiV1PipelinesPipelineIdRetrievePost, getPipelineStatusApiV1PipelinesPipelineIdStatusGet, getPipelineDocumentStatusApiV1PipelinesPipelineIdDocumentsDocumentIdStatusGet, upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut, createBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPost, deletePipelineDocumentApiV1PipelinesPipelineIdDocumentsDocumentIdDelete, upsertPipelineApiV1PipelinesPut } from '@llamaindex/cloud/api';
import { DEFAULT_BASE_URL, DEFAULT_PROJECT_NAME, Settings as Settings$1 } from '@llamaindex/core/global';
import { getEnv, AsyncLocalStorage } from '@llamaindex/env';
import { RetrieverQueryEngine } from '@llamaindex/core/query-engine';
import { BaseRetriever } from '@llamaindex/core/retriever';
import { jsonToNode, ObjectType } from '@llamaindex/core/schema';
import { extractText } from '@llamaindex/core/utils';
import { PromptHelper } from '@llamaindex/core/indices';
import { SentenceSplitter } from '@llamaindex/core/node-parser';
function getBaseUrl(baseUrl) {
return baseUrl ?? getEnv("LLAMA_CLOUD_BASE_URL") ?? DEFAULT_BASE_URL;
}
function getAppBaseUrl() {
return client.getConfig().baseUrl?.replace(/api\./, "") ?? "";
}
// fixme: refactor this to init at the top level or module level
let initOnce = false;
function initService({ apiKey, baseUrl } = {}) {
if (initOnce) {
return;
}
initOnce = true;
client.setConfig({
baseUrl: getBaseUrl(baseUrl),
throwOnError: true
});
const token = apiKey ?? getEnv("LLAMA_CLOUD_API_KEY");
client.interceptors.request.use((request)=>{
request.headers.set("Authorization", `Bearer ${token}`);
return request;
});
client.interceptors.error.use((error)=>{
throw new Error(`LlamaCloud API request failed. Error details: ${JSON.stringify(error)}`);
});
if (!token) {
throw new Error("API Key is required for LlamaCloudIndex. Please pass the apiKey parameter");
}
}
async function getProjectId(projectName, organizationId) {
const { data: projects } = await listProjectsApiV1ProjectsGet({
query: {
project_name: projectName,
organization_id: organizationId ?? null
},
throwOnError: true
});
if (projects.length === 0) {
throw new Error(`Unknown project name ${projectName}. Please confirm a managed project with this name exists.`);
} else if (projects.length > 1) {
throw new Error(`Multiple projects found with name ${projectName}. Please specify organization_id.`);
}
const project = projects[0];
if (!project.id) {
throw new Error(`No project found with name ${projectName}`);
}
return project.id;
}
async function getPipelineId(name, projectName, organizationId) {
const { data: pipelines } = await searchPipelinesApiV1PipelinesGet({
query: {
project_id: await getProjectId(projectName, organizationId),
pipeline_name: name
},
throwOnError: true
});
if (pipelines.length === 0 || !pipelines[0].id) {
throw new Error(`No pipeline found with name ${name} in project ${projectName}`);
}
return pipelines[0].id;
}
class LLamaCloudFileService {
/**
* Get list of projects, each project contains a list of pipelines
*/ static async getAllProjectsWithPipelines() {
initService();
try {
const { data: projects } = await listProjectsApiV1ProjectsGet({
throwOnError: true
});
const { data: pipelines } = await searchPipelinesApiV1PipelinesGet({
throwOnError: true
});
return projects.map((project)=>({
...project,
pipelines: pipelines.filter((p)=>p.project_id === project.id)
}));
} catch (error) {
console.error("Error listing projects and pipelines:", error);
return [];
}
}
/**
* Upload a file to a pipeline in LlamaCloud
*/ static async addFileToPipeline(projectId, pipelineId, uploadFile, // eslint-disable-next-line @typescript-eslint/no-explicit-any
customMetadata = {}) {
initService();
const { data: file } = await uploadFileApiV1FilesPost({
query: {
project_id: projectId
},
body: {
upload_file: uploadFile
},
throwOnError: true
});
const files = [
{
file_id: file.id,
custom_metadata: {
file_id: file.id,
...customMetadata
}
}
];
await addFilesToPipelineApiApiV1PipelinesPipelineIdFilesPut({
path: {
pipeline_id: pipelineId
},
body: files
});
// Wait 2s for the file to be processed
const maxAttempts = 20;
let attempt = 0;
while(attempt < maxAttempts){
const { data: result } = await getPipelineFileStatusApiV1PipelinesPipelineIdFilesFileIdStatusGet({
path: {
pipeline_id: pipelineId,
file_id: file.id
},
throwOnError: true
});
if (result.status === "ERROR") {
throw new Error(`File processing failed: ${JSON.stringify(result)}`);
}
if (result.status === "SUCCESS") {
// File is ingested - return the file id
return file.id;
}
attempt += 1;
await new Promise((resolve)=>setTimeout(resolve, 100)); // Sleep for 100ms
}
throw new Error(`File processing did not complete after ${maxAttempts} attempts. Check your LlamaCloud index at https://cloud.llamaindex.ai/project/${projectId}/deploy/${pipelineId} for more details.`);
}
/**
* Get download URL for a file in LlamaCloud
*/ static async getFileUrl(pipelineId, filename) {
initService();
const { data: allPipelineFiles } = await listPipelineFilesApiV1PipelinesPipelineIdFilesGet({
path: {
pipeline_id: pipelineId
},
throwOnError: true
});
const file = allPipelineFiles.find((file)=>file.name === filename);
if (!file?.file_id) return null;
const { data: fileContent } = await readFileContentApiV1FilesIdContentGet({
path: {
id: file.file_id
},
query: {
project_id: file.project_id
},
throwOnError: true
});
return fileContent.url;
}
}
class LlamaCloudRetriever extends BaseRetriever {
resultNodesToNodeWithScore(nodes) {
return nodes.map((node)=>{
const textNode = jsonToNode(node.node, ObjectType.TEXT);
textNode.metadata = {
...textNode.metadata,
...node.node.extra_info
};
return {
// Currently LlamaCloud only supports text nodes
node: textNode,
score: node.score ?? undefined
};
});
}
// LlamaCloud expects null values for filters, but LlamaIndexTS uses undefined for empty values
// This function converts the undefined values to null
convertFilter(filters) {
if (!filters) return null;
const processFilter = (filter)=>{
if ("filters" in filter) {
// type MetadataFilters
return {
...filter,
filters: filter.filters.map(processFilter)
};
}
return {
...filter,
value: filter.value ?? null
};
};
return {
...filters,
filters: filters.filters.map(processFilter)
};
}
constructor(params){
super(), this.projectName = DEFAULT_PROJECT_NAME;
this.clientParams = {
apiKey: params.apiKey,
baseUrl: params.baseUrl
};
initService(this.clientParams);
this.retrieveParams = params;
this.pipelineName = params.name;
if (params.projectName) {
this.projectName = params.projectName;
}
if (params.organizationId) {
this.organizationId = params.organizationId;
}
}
async _retrieve(query) {
const pipelineId = await getPipelineId(this.pipelineName, this.projectName, this.organizationId);
const filters = this.convertFilter(this.retrieveParams.filters);
const { data: results } = await runSearchApiV1PipelinesPipelineIdRetrievePost({
throwOnError: true,
path: {
pipeline_id: pipelineId
},
body: {
...this.retrieveParams,
query: extractText(query),
search_filters: filters,
dense_similarity_top_k: this.retrieveParams.similarityTopK
}
});
return this.resultNodesToNodeWithScore(results.retrieval_nodes);
}
}
/**
* @internal
*/ class GlobalSettings {
#prompt;
#promptHelper;
#nodeParser;
#chunkOverlap;
#promptHelperAsyncLocalStorage;
#nodeParserAsyncLocalStorage;
#chunkOverlapAsyncLocalStorage;
#promptAsyncLocalStorage;
get debug() {
return Settings$1.debug;
}
get llm() {
return Settings$1.llm;
}
set llm(llm) {
Settings$1.llm = llm;
}
withLLM(llm, fn) {
return Settings$1.withLLM(llm, fn);
}
get promptHelper() {
if (this.#promptHelper === null) {
this.#promptHelper = new PromptHelper();
}
return this.#promptHelperAsyncLocalStorage.getStore() ?? this.#promptHelper;
}
set promptHelper(promptHelper) {
this.#promptHelper = promptHelper;
}
withPromptHelper(promptHelper, fn) {
return this.#promptHelperAsyncLocalStorage.run(promptHelper, fn);
}
get embedModel() {
return Settings$1.embedModel;
}
set embedModel(embedModel) {
Settings$1.embedModel = embedModel;
}
withEmbedModel(embedModel, fn) {
return Settings$1.withEmbedModel(embedModel, fn);
}
get nodeParser() {
if (this.#nodeParser === null) {
this.#nodeParser = new SentenceSplitter({
chunkSize: this.chunkSize,
chunkOverlap: this.chunkOverlap
});
}
return this.#nodeParserAsyncLocalStorage.getStore() ?? this.#nodeParser;
}
set nodeParser(nodeParser) {
this.#nodeParser = nodeParser;
}
withNodeParser(nodeParser, fn) {
return this.#nodeParserAsyncLocalStorage.run(nodeParser, fn);
}
get callbackManager() {
return Settings$1.callbackManager;
}
set callbackManager(callbackManager) {
Settings$1.callbackManager = callbackManager;
}
withCallbackManager(callbackManager, fn) {
return Settings$1.withCallbackManager(callbackManager, fn);
}
set chunkSize(chunkSize) {
Settings$1.chunkSize = chunkSize;
}
get chunkSize() {
return Settings$1.chunkSize;
}
withChunkSize(chunkSize, fn) {
return Settings$1.withChunkSize(chunkSize, fn);
}
get chunkOverlap() {
return this.#chunkOverlapAsyncLocalStorage.getStore() ?? this.#chunkOverlap;
}
set chunkOverlap(chunkOverlap) {
if (typeof chunkOverlap === "number") {
this.#chunkOverlap = chunkOverlap;
}
}
withChunkOverlap(chunkOverlap, fn) {
return this.#chunkOverlapAsyncLocalStorage.run(chunkOverlap, fn);
}
get prompt() {
return this.#promptAsyncLocalStorage.getStore() ?? this.#prompt;
}
set prompt(prompt) {
this.#prompt = prompt;
}
withPrompt(prompt, fn) {
return this.#promptAsyncLocalStorage.run(prompt, fn);
}
constructor(){
this.#prompt = {};
this.#promptHelper = null;
this.#nodeParser = null;
this.#promptHelperAsyncLocalStorage = new AsyncLocalStorage();
this.#nodeParserAsyncLocalStorage = new AsyncLocalStorage();
this.#chunkOverlapAsyncLocalStorage = new AsyncLocalStorage();
this.#promptAsyncLocalStorage = new AsyncLocalStorage();
}
}
const Settings = new GlobalSettings();
const DEFAULT_NAME = "query_engine_tool";
const DEFAULT_DESCRIPTION = "Useful for running a natural language query against a knowledge base and get back a natural language response.";
const DEFAULT_PARAMETERS = {
type: "object",
properties: {
query: {
type: "string",
description: "The query to search for"
}
},
required: [
"query"
]
};
class QueryEngineTool {
constructor({ queryEngine, metadata, includeSourceNodes }){
this.queryEngine = queryEngine;
this.metadata = {
name: metadata?.name ?? DEFAULT_NAME,
description: metadata?.description ?? DEFAULT_DESCRIPTION,
parameters: metadata?.parameters ?? DEFAULT_PARAMETERS
};
this.includeSourceNodes = includeSourceNodes ?? false;
}
async call({ query }) {
const response = await this.queryEngine.query({
query
});
if (!this.includeSourceNodes) {
return {
content: response.message.content
};
}
return {
content: response.message.content,
sourceNodes: response.sourceNodes
};
}
}
class LlamaCloudIndex {
constructor(params){
this.params = params;
initService(this.params);
}
async waitForPipelineIngestion(verbose = Settings.debug, raiseOnError = false) {
const pipelineId = await this.getPipelineId();
if (verbose) {
console.log("Waiting for pipeline ingestion: ");
}
while(true){
const { data: pipelineStatus } = await getPipelineStatusApiV1PipelinesPipelineIdStatusGet({
path: {
pipeline_id: pipelineId
},
throwOnError: true
});
if (pipelineStatus.status === "SUCCESS") {
if (verbose) {
console.log("Pipeline ingestion completed successfully");
}
break;
}
if (pipelineStatus.status === "ERROR") {
if (verbose) {
console.error("Pipeline ingestion failed");
}
if (raiseOnError) {
throw new Error("Pipeline ingestion failed");
}
}
if (verbose) {
process.stdout.write(".");
}
await new Promise((resolve)=>setTimeout(resolve, 1000));
}
}
async waitForDocumentIngestion(docIds, verbose = Settings.debug, raiseOnError = false) {
const pipelineId = await this.getPipelineId();
if (verbose) {
console.log("Loading data: ");
}
const pendingDocs = new Set(docIds);
while(pendingDocs.size){
const docsToRemove = new Set();
for (const doc of pendingDocs){
const { data: { status } } = await getPipelineDocumentStatusApiV1PipelinesPipelineIdDocumentsDocumentIdStatusGet({
path: {
pipeline_id: pipelineId,
document_id: doc
},
throwOnError: true
});
if (status === "NOT_STARTED" || status === "IN_PROGRESS") {
continue;
}
if (status === "ERROR") {
if (verbose) {
console.error(`Document ingestion failed for ${doc}`);
}
if (raiseOnError) {
throw new Error(`Document ingestion failed for ${doc}`);
}
}
docsToRemove.add(doc);
}
for (const doc of docsToRemove){
pendingDocs.delete(doc);
}
if (pendingDocs.size) {
if (verbose) {
process.stdout.write(".");
}
await new Promise((resolve)=>setTimeout(resolve, 500));
}
}
if (verbose) {
console.log("Done!");
}
await this.waitForPipelineIngestion(verbose, raiseOnError);
}
async getPipelineId(name, projectName, organizationId) {
return await getPipelineId(name ?? this.params.name, projectName ?? this.params.projectName, organizationId ?? this.params.organizationId);
}
async getProjectId(projectName, organizationId) {
return await getProjectId(projectName ?? this.params.projectName, organizationId ?? this.params.organizationId);
}
/**
* Adds documents to the given index parameters. If the index does not exist, it will be created.
*
* @param params - An object containing the following properties:
* - documents: An array of Document objects to be added to the index.
* - verbose: Optional boolean to enable verbose logging.
* - Additional properties from CloudConstructorParams.
* @returns A Promise that resolves to a new LlamaCloudIndex instance.
*/ static async fromDocuments(params, config) {
const index = new LlamaCloudIndex({
...params
});
await index.ensureIndex({
...config,
verbose: params.verbose ?? false
});
await index.addDocuments(params.documents, params.verbose);
return index;
}
async addDocuments(documents, verbose) {
const apiUrl = getAppBaseUrl();
const projectId = await this.getProjectId();
const pipelineId = await this.getPipelineId();
await upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut({
path: {
pipeline_id: pipelineId
},
body: documents.map((doc)=>({
metadata: doc.metadata,
text: doc.text,
excluded_embed_metadata_keys: doc.excludedEmbedMetadataKeys,
excluded_llm_metadata_keys: doc.excludedEmbedMetadataKeys,
id: doc.id_
}))
});
while(true){
const { data: pipelineStatus } = await getPipelineStatusApiV1PipelinesPipelineIdStatusGet({
path: {
pipeline_id: pipelineId
},
throwOnError: true
});
if (pipelineStatus.status === "SUCCESS") {
console.info("Documents ingested successfully, pipeline is ready to use");
break;
}
if (pipelineStatus.status === "ERROR") {
console.error(`Some documents failed to ingest, check your pipeline logs at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`);
throw new Error("Some documents failed to ingest");
}
if (pipelineStatus.status === "PARTIAL_SUCCESS") {
console.info(`Documents ingestion partially succeeded, to check a more complete status check your pipeline at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`);
break;
}
if (verbose) {
process.stdout.write(".");
}
await new Promise((resolve)=>setTimeout(resolve, 1000));
}
if (verbose) {
console.info(`Ingestion completed, find your index at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`);
}
}
asRetriever(params = {}) {
return new LlamaCloudRetriever({
...this.params,
...params
});
}
asQueryEngine(params) {
const retriever = new LlamaCloudRetriever({
...this.params,
...params
});
return new RetrieverQueryEngine(retriever, params?.responseSynthesizer, params?.nodePostprocessors);
}
asQueryTool(params) {
if (params.options) {
params.retriever = this.asRetriever(params.options);
}
return new QueryEngineTool({
queryEngine: this.asQueryEngine(params),
metadata: params?.metadata,
includeSourceNodes: params?.includeSourceNodes ?? false
});
}
queryTool(params) {
return this.asQueryTool(params);
}
async insert(document) {
const pipelineId = await this.getPipelineId();
await createBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPost({
path: {
pipeline_id: pipelineId
},
body: [
{
metadata: document.metadata,
text: document.text,
excluded_embed_metadata_keys: document.excludedLlmMetadataKeys,
excluded_llm_metadata_keys: document.excludedEmbedMetadataKeys,
id: document.id_
}
]
});
await this.waitForDocumentIngestion([
document.id_
]);
}
async delete(document) {
const pipelineId = await this.getPipelineId();
await deletePipelineDocumentApiV1PipelinesPipelineIdDocumentsDocumentIdDelete({
path: {
pipeline_id: pipelineId,
document_id: document.id_
}
});
await this.waitForPipelineIngestion();
}
async refreshDoc(document) {
const pipelineId = await this.getPipelineId();
await upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut({
path: {
pipeline_id: pipelineId
},
body: [
{
metadata: document.metadata,
text: document.text,
excluded_embed_metadata_keys: document.excludedLlmMetadataKeys,
excluded_llm_metadata_keys: document.excludedEmbedMetadataKeys,
id: document.id_
}
]
});
await this.waitForDocumentIngestion([
document.id_
]);
}
async ensureIndex(config) {
const projectId = await this.getProjectId();
const { data: pipelines } = await searchPipelinesApiV1PipelinesGet({
query: {
project_id: projectId,
pipeline_name: this.params.name
},
throwOnError: true
});
if (pipelines.length === 0) {
// no pipeline found, create a new one
let embeddingConfig = config?.embedding;
if (!embeddingConfig) {
// no embedding config provided, use OpenAI as default
const openAIApiKey = getEnv("OPENAI_API_KEY");
const embeddingModel = getEnv("EMBEDDING_MODEL");
if (!openAIApiKey || !embeddingModel) {
throw new Error("No embedding configuration provided. Fallback to OpenAI embedding model. OPENAI_API_KEY and EMBEDDING_MODEL environment variables must be set.");
}
embeddingConfig = {
type: "OPENAI_EMBEDDING",
component: {
api_key: openAIApiKey,
model_name: embeddingModel
}
};
}
let transformConfig = config?.transform;
if (!transformConfig) {
transformConfig = {
mode: "auto",
chunk_size: 1024,
chunk_overlap: 200
};
}
const { data: pipeline } = await upsertPipelineApiV1PipelinesPut({
query: {
project_id: projectId
},
body: {
name: this.params.name,
embedding_config: embeddingConfig,
transform_config: transformConfig
},
throwOnError: true
});
if (config?.verbose) {
console.log(`Created pipeline ${pipeline.id} with name ${pipeline.name}`);
}
}
}
}
console.warn(`
The classes LlamaCloudFileService, LlamaCloudIndex and LlamaCloudRetriever have been moved to the package llama-cloud-services.
* Please migrate your imports to llama-cloud-services, e.g. import { LlamaCloudIndex } from "llama-cloud-services";
* See the documentation: https://docs.cloud.llamaindex.ai
`);
export { LLamaCloudFileService, LlamaCloudIndex, LlamaCloudRetriever };