UNPKG

llamaindex

Version:

<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>

678 lines (669 loc) 25.3 kB
import { client, searchPipelinesApiV1PipelinesGet, listProjectsApiV1ProjectsGet, uploadFileApiV1FilesPost, addFilesToPipelineApiApiV1PipelinesPipelineIdFilesPut, getPipelineFileStatusApiV1PipelinesPipelineIdFilesFileIdStatusGet, listPipelineFilesApiV1PipelinesPipelineIdFilesGet, readFileContentApiV1FilesIdContentGet, runSearchApiV1PipelinesPipelineIdRetrievePost, getPipelineStatusApiV1PipelinesPipelineIdStatusGet, getPipelineDocumentStatusApiV1PipelinesPipelineIdDocumentsDocumentIdStatusGet, upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut, createBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPost, deletePipelineDocumentApiV1PipelinesPipelineIdDocumentsDocumentIdDelete, upsertPipelineApiV1PipelinesPut } from '@llamaindex/cloud/api'; import { DEFAULT_BASE_URL, DEFAULT_PROJECT_NAME, Settings as Settings$1 } from '@llamaindex/core/global'; import { getEnv, AsyncLocalStorage } from '@llamaindex/env'; import { RetrieverQueryEngine } from '@llamaindex/core/query-engine'; import { BaseRetriever } from '@llamaindex/core/retriever'; import { jsonToNode, ObjectType } from '@llamaindex/core/schema'; import { extractText } from '@llamaindex/core/utils'; import { PromptHelper } from '@llamaindex/core/indices'; import { SentenceSplitter } from '@llamaindex/core/node-parser'; function getBaseUrl(baseUrl) { return baseUrl ?? getEnv("LLAMA_CLOUD_BASE_URL") ?? DEFAULT_BASE_URL; } function getAppBaseUrl() { return client.getConfig().baseUrl?.replace(/api\./, "") ?? ""; } // fixme: refactor this to init at the top level or module level let initOnce = false; function initService({ apiKey, baseUrl } = {}) { if (initOnce) { return; } initOnce = true; client.setConfig({ baseUrl: getBaseUrl(baseUrl), throwOnError: true }); const token = apiKey ?? getEnv("LLAMA_CLOUD_API_KEY"); client.interceptors.request.use((request)=>{ request.headers.set("Authorization", `Bearer ${token}`); return request; }); client.interceptors.error.use((error)=>{ throw new Error(`LlamaCloud API request failed. Error details: ${JSON.stringify(error)}`); }); if (!token) { throw new Error("API Key is required for LlamaCloudIndex. Please pass the apiKey parameter"); } } async function getProjectId(projectName, organizationId) { const { data: projects } = await listProjectsApiV1ProjectsGet({ query: { project_name: projectName, organization_id: organizationId ?? null }, throwOnError: true }); if (projects.length === 0) { throw new Error(`Unknown project name ${projectName}. Please confirm a managed project with this name exists.`); } else if (projects.length > 1) { throw new Error(`Multiple projects found with name ${projectName}. Please specify organization_id.`); } const project = projects[0]; if (!project.id) { throw new Error(`No project found with name ${projectName}`); } return project.id; } async function getPipelineId(name, projectName, organizationId) { const { data: pipelines } = await searchPipelinesApiV1PipelinesGet({ query: { project_id: await getProjectId(projectName, organizationId), pipeline_name: name }, throwOnError: true }); if (pipelines.length === 0 || !pipelines[0].id) { throw new Error(`No pipeline found with name ${name} in project ${projectName}`); } return pipelines[0].id; } class LLamaCloudFileService { /** * Get list of projects, each project contains a list of pipelines */ static async getAllProjectsWithPipelines() { initService(); try { const { data: projects } = await listProjectsApiV1ProjectsGet({ throwOnError: true }); const { data: pipelines } = await searchPipelinesApiV1PipelinesGet({ throwOnError: true }); return projects.map((project)=>({ ...project, pipelines: pipelines.filter((p)=>p.project_id === project.id) })); } catch (error) { console.error("Error listing projects and pipelines:", error); return []; } } /** * Upload a file to a pipeline in LlamaCloud */ static async addFileToPipeline(projectId, pipelineId, uploadFile, // eslint-disable-next-line @typescript-eslint/no-explicit-any customMetadata = {}) { initService(); const { data: file } = await uploadFileApiV1FilesPost({ query: { project_id: projectId }, body: { upload_file: uploadFile }, throwOnError: true }); const files = [ { file_id: file.id, custom_metadata: { file_id: file.id, ...customMetadata } } ]; await addFilesToPipelineApiApiV1PipelinesPipelineIdFilesPut({ path: { pipeline_id: pipelineId }, body: files }); // Wait 2s for the file to be processed const maxAttempts = 20; let attempt = 0; while(attempt < maxAttempts){ const { data: result } = await getPipelineFileStatusApiV1PipelinesPipelineIdFilesFileIdStatusGet({ path: { pipeline_id: pipelineId, file_id: file.id }, throwOnError: true }); if (result.status === "ERROR") { throw new Error(`File processing failed: ${JSON.stringify(result)}`); } if (result.status === "SUCCESS") { // File is ingested - return the file id return file.id; } attempt += 1; await new Promise((resolve)=>setTimeout(resolve, 100)); // Sleep for 100ms } throw new Error(`File processing did not complete after ${maxAttempts} attempts. Check your LlamaCloud index at https://cloud.llamaindex.ai/project/${projectId}/deploy/${pipelineId} for more details.`); } /** * Get download URL for a file in LlamaCloud */ static async getFileUrl(pipelineId, filename) { initService(); const { data: allPipelineFiles } = await listPipelineFilesApiV1PipelinesPipelineIdFilesGet({ path: { pipeline_id: pipelineId }, throwOnError: true }); const file = allPipelineFiles.find((file)=>file.name === filename); if (!file?.file_id) return null; const { data: fileContent } = await readFileContentApiV1FilesIdContentGet({ path: { id: file.file_id }, query: { project_id: file.project_id }, throwOnError: true }); return fileContent.url; } } class LlamaCloudRetriever extends BaseRetriever { resultNodesToNodeWithScore(nodes) { return nodes.map((node)=>{ const textNode = jsonToNode(node.node, ObjectType.TEXT); textNode.metadata = { ...textNode.metadata, ...node.node.extra_info }; return { // Currently LlamaCloud only supports text nodes node: textNode, score: node.score ?? undefined }; }); } // LlamaCloud expects null values for filters, but LlamaIndexTS uses undefined for empty values // This function converts the undefined values to null convertFilter(filters) { if (!filters) return null; const processFilter = (filter)=>{ if ("filters" in filter) { // type MetadataFilters return { ...filter, filters: filter.filters.map(processFilter) }; } return { ...filter, value: filter.value ?? null }; }; return { ...filters, filters: filters.filters.map(processFilter) }; } constructor(params){ super(), this.projectName = DEFAULT_PROJECT_NAME; this.clientParams = { apiKey: params.apiKey, baseUrl: params.baseUrl }; initService(this.clientParams); this.retrieveParams = params; this.pipelineName = params.name; if (params.projectName) { this.projectName = params.projectName; } if (params.organizationId) { this.organizationId = params.organizationId; } } async _retrieve(query) { const pipelineId = await getPipelineId(this.pipelineName, this.projectName, this.organizationId); const filters = this.convertFilter(this.retrieveParams.filters); const { data: results } = await runSearchApiV1PipelinesPipelineIdRetrievePost({ throwOnError: true, path: { pipeline_id: pipelineId }, body: { ...this.retrieveParams, query: extractText(query), search_filters: filters, dense_similarity_top_k: this.retrieveParams.similarityTopK } }); return this.resultNodesToNodeWithScore(results.retrieval_nodes); } } /** * @internal */ class GlobalSettings { #prompt; #promptHelper; #nodeParser; #chunkOverlap; #promptHelperAsyncLocalStorage; #nodeParserAsyncLocalStorage; #chunkOverlapAsyncLocalStorage; #promptAsyncLocalStorage; get debug() { return Settings$1.debug; } get llm() { return Settings$1.llm; } set llm(llm) { Settings$1.llm = llm; } withLLM(llm, fn) { return Settings$1.withLLM(llm, fn); } get promptHelper() { if (this.#promptHelper === null) { this.#promptHelper = new PromptHelper(); } return this.#promptHelperAsyncLocalStorage.getStore() ?? this.#promptHelper; } set promptHelper(promptHelper) { this.#promptHelper = promptHelper; } withPromptHelper(promptHelper, fn) { return this.#promptHelperAsyncLocalStorage.run(promptHelper, fn); } get embedModel() { return Settings$1.embedModel; } set embedModel(embedModel) { Settings$1.embedModel = embedModel; } withEmbedModel(embedModel, fn) { return Settings$1.withEmbedModel(embedModel, fn); } get nodeParser() { if (this.#nodeParser === null) { this.#nodeParser = new SentenceSplitter({ chunkSize: this.chunkSize, chunkOverlap: this.chunkOverlap }); } return this.#nodeParserAsyncLocalStorage.getStore() ?? this.#nodeParser; } set nodeParser(nodeParser) { this.#nodeParser = nodeParser; } withNodeParser(nodeParser, fn) { return this.#nodeParserAsyncLocalStorage.run(nodeParser, fn); } get callbackManager() { return Settings$1.callbackManager; } set callbackManager(callbackManager) { Settings$1.callbackManager = callbackManager; } withCallbackManager(callbackManager, fn) { return Settings$1.withCallbackManager(callbackManager, fn); } set chunkSize(chunkSize) { Settings$1.chunkSize = chunkSize; } get chunkSize() { return Settings$1.chunkSize; } withChunkSize(chunkSize, fn) { return Settings$1.withChunkSize(chunkSize, fn); } get chunkOverlap() { return this.#chunkOverlapAsyncLocalStorage.getStore() ?? this.#chunkOverlap; } set chunkOverlap(chunkOverlap) { if (typeof chunkOverlap === "number") { this.#chunkOverlap = chunkOverlap; } } withChunkOverlap(chunkOverlap, fn) { return this.#chunkOverlapAsyncLocalStorage.run(chunkOverlap, fn); } get prompt() { return this.#promptAsyncLocalStorage.getStore() ?? this.#prompt; } set prompt(prompt) { this.#prompt = prompt; } withPrompt(prompt, fn) { return this.#promptAsyncLocalStorage.run(prompt, fn); } constructor(){ this.#prompt = {}; this.#promptHelper = null; this.#nodeParser = null; this.#promptHelperAsyncLocalStorage = new AsyncLocalStorage(); this.#nodeParserAsyncLocalStorage = new AsyncLocalStorage(); this.#chunkOverlapAsyncLocalStorage = new AsyncLocalStorage(); this.#promptAsyncLocalStorage = new AsyncLocalStorage(); } } const Settings = new GlobalSettings(); const DEFAULT_NAME = "query_engine_tool"; const DEFAULT_DESCRIPTION = "Useful for running a natural language query against a knowledge base and get back a natural language response."; const DEFAULT_PARAMETERS = { type: "object", properties: { query: { type: "string", description: "The query to search for" } }, required: [ "query" ] }; class QueryEngineTool { constructor({ queryEngine, metadata, includeSourceNodes }){ this.queryEngine = queryEngine; this.metadata = { name: metadata?.name ?? DEFAULT_NAME, description: metadata?.description ?? DEFAULT_DESCRIPTION, parameters: metadata?.parameters ?? DEFAULT_PARAMETERS }; this.includeSourceNodes = includeSourceNodes ?? false; } async call({ query }) { const response = await this.queryEngine.query({ query }); if (!this.includeSourceNodes) { return { content: response.message.content }; } return { content: response.message.content, sourceNodes: response.sourceNodes }; } } class LlamaCloudIndex { constructor(params){ this.params = params; initService(this.params); } async waitForPipelineIngestion(verbose = Settings.debug, raiseOnError = false) { const pipelineId = await this.getPipelineId(); if (verbose) { console.log("Waiting for pipeline ingestion: "); } while(true){ const { data: pipelineStatus } = await getPipelineStatusApiV1PipelinesPipelineIdStatusGet({ path: { pipeline_id: pipelineId }, throwOnError: true }); if (pipelineStatus.status === "SUCCESS") { if (verbose) { console.log("Pipeline ingestion completed successfully"); } break; } if (pipelineStatus.status === "ERROR") { if (verbose) { console.error("Pipeline ingestion failed"); } if (raiseOnError) { throw new Error("Pipeline ingestion failed"); } } if (verbose) { process.stdout.write("."); } await new Promise((resolve)=>setTimeout(resolve, 1000)); } } async waitForDocumentIngestion(docIds, verbose = Settings.debug, raiseOnError = false) { const pipelineId = await this.getPipelineId(); if (verbose) { console.log("Loading data: "); } const pendingDocs = new Set(docIds); while(pendingDocs.size){ const docsToRemove = new Set(); for (const doc of pendingDocs){ const { data: { status } } = await getPipelineDocumentStatusApiV1PipelinesPipelineIdDocumentsDocumentIdStatusGet({ path: { pipeline_id: pipelineId, document_id: doc }, throwOnError: true }); if (status === "NOT_STARTED" || status === "IN_PROGRESS") { continue; } if (status === "ERROR") { if (verbose) { console.error(`Document ingestion failed for ${doc}`); } if (raiseOnError) { throw new Error(`Document ingestion failed for ${doc}`); } } docsToRemove.add(doc); } for (const doc of docsToRemove){ pendingDocs.delete(doc); } if (pendingDocs.size) { if (verbose) { process.stdout.write("."); } await new Promise((resolve)=>setTimeout(resolve, 500)); } } if (verbose) { console.log("Done!"); } await this.waitForPipelineIngestion(verbose, raiseOnError); } async getPipelineId(name, projectName, organizationId) { return await getPipelineId(name ?? this.params.name, projectName ?? this.params.projectName, organizationId ?? this.params.organizationId); } async getProjectId(projectName, organizationId) { return await getProjectId(projectName ?? this.params.projectName, organizationId ?? this.params.organizationId); } /** * Adds documents to the given index parameters. If the index does not exist, it will be created. * * @param params - An object containing the following properties: * - documents: An array of Document objects to be added to the index. * - verbose: Optional boolean to enable verbose logging. * - Additional properties from CloudConstructorParams. * @returns A Promise that resolves to a new LlamaCloudIndex instance. */ static async fromDocuments(params, config) { const index = new LlamaCloudIndex({ ...params }); await index.ensureIndex({ ...config, verbose: params.verbose ?? false }); await index.addDocuments(params.documents, params.verbose); return index; } async addDocuments(documents, verbose) { const apiUrl = getAppBaseUrl(); const projectId = await this.getProjectId(); const pipelineId = await this.getPipelineId(); await upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut({ path: { pipeline_id: pipelineId }, body: documents.map((doc)=>({ metadata: doc.metadata, text: doc.text, excluded_embed_metadata_keys: doc.excludedEmbedMetadataKeys, excluded_llm_metadata_keys: doc.excludedEmbedMetadataKeys, id: doc.id_ })) }); while(true){ const { data: pipelineStatus } = await getPipelineStatusApiV1PipelinesPipelineIdStatusGet({ path: { pipeline_id: pipelineId }, throwOnError: true }); if (pipelineStatus.status === "SUCCESS") { console.info("Documents ingested successfully, pipeline is ready to use"); break; } if (pipelineStatus.status === "ERROR") { console.error(`Some documents failed to ingest, check your pipeline logs at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`); throw new Error("Some documents failed to ingest"); } if (pipelineStatus.status === "PARTIAL_SUCCESS") { console.info(`Documents ingestion partially succeeded, to check a more complete status check your pipeline at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`); break; } if (verbose) { process.stdout.write("."); } await new Promise((resolve)=>setTimeout(resolve, 1000)); } if (verbose) { console.info(`Ingestion completed, find your index at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`); } } asRetriever(params = {}) { return new LlamaCloudRetriever({ ...this.params, ...params }); } asQueryEngine(params) { const retriever = new LlamaCloudRetriever({ ...this.params, ...params }); return new RetrieverQueryEngine(retriever, params?.responseSynthesizer, params?.nodePostprocessors); } asQueryTool(params) { if (params.options) { params.retriever = this.asRetriever(params.options); } return new QueryEngineTool({ queryEngine: this.asQueryEngine(params), metadata: params?.metadata, includeSourceNodes: params?.includeSourceNodes ?? false }); } queryTool(params) { return this.asQueryTool(params); } async insert(document) { const pipelineId = await this.getPipelineId(); await createBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPost({ path: { pipeline_id: pipelineId }, body: [ { metadata: document.metadata, text: document.text, excluded_embed_metadata_keys: document.excludedLlmMetadataKeys, excluded_llm_metadata_keys: document.excludedEmbedMetadataKeys, id: document.id_ } ] }); await this.waitForDocumentIngestion([ document.id_ ]); } async delete(document) { const pipelineId = await this.getPipelineId(); await deletePipelineDocumentApiV1PipelinesPipelineIdDocumentsDocumentIdDelete({ path: { pipeline_id: pipelineId, document_id: document.id_ } }); await this.waitForPipelineIngestion(); } async refreshDoc(document) { const pipelineId = await this.getPipelineId(); await upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut({ path: { pipeline_id: pipelineId }, body: [ { metadata: document.metadata, text: document.text, excluded_embed_metadata_keys: document.excludedLlmMetadataKeys, excluded_llm_metadata_keys: document.excludedEmbedMetadataKeys, id: document.id_ } ] }); await this.waitForDocumentIngestion([ document.id_ ]); } async ensureIndex(config) { const projectId = await this.getProjectId(); const { data: pipelines } = await searchPipelinesApiV1PipelinesGet({ query: { project_id: projectId, pipeline_name: this.params.name }, throwOnError: true }); if (pipelines.length === 0) { // no pipeline found, create a new one let embeddingConfig = config?.embedding; if (!embeddingConfig) { // no embedding config provided, use OpenAI as default const openAIApiKey = getEnv("OPENAI_API_KEY"); const embeddingModel = getEnv("EMBEDDING_MODEL"); if (!openAIApiKey || !embeddingModel) { throw new Error("No embedding configuration provided. Fallback to OpenAI embedding model. OPENAI_API_KEY and EMBEDDING_MODEL environment variables must be set."); } embeddingConfig = { type: "OPENAI_EMBEDDING", component: { api_key: openAIApiKey, model_name: embeddingModel } }; } let transformConfig = config?.transform; if (!transformConfig) { transformConfig = { mode: "auto", chunk_size: 1024, chunk_overlap: 200 }; } const { data: pipeline } = await upsertPipelineApiV1PipelinesPut({ query: { project_id: projectId }, body: { name: this.params.name, embedding_config: embeddingConfig, transform_config: transformConfig }, throwOnError: true }); if (config?.verbose) { console.log(`Created pipeline ${pipeline.id} with name ${pipeline.name}`); } } } } console.warn(` The classes LlamaCloudFileService, LlamaCloudIndex and LlamaCloudRetriever have been moved to the package llama-cloud-services. * Please migrate your imports to llama-cloud-services, e.g. import { LlamaCloudIndex } from "llama-cloud-services"; * See the documentation: https://docs.cloud.llamaindex.ai `); export { LLamaCloudFileService, LlamaCloudIndex, LlamaCloudRetriever };