UNPKG

llamaindex

Version:

<p align="center"> <img height="100" width="100" alt="LlamaIndex logo" src="https://ts.llamaindex.ai/square.svg" /> </p> <h1 align="center">LlamaIndex.TS</h1> <h3 align="center"> Data framework for your LLM application. </h3>

287 lines (286 loc) 11.7 kB
import { RetrieverQueryEngine } from "../engines/query/RetrieverQueryEngine.js"; import { LlamaCloudRetriever } from "./LlamaCloudRetriever.js"; import { getAppBaseUrl, getPipelineId, getProjectId, initService } from "./utils.js"; import { createBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPost, deletePipelineDocumentApiV1PipelinesPipelineIdDocumentsDocumentIdDelete, getPipelineDocumentStatusApiV1PipelinesPipelineIdDocumentsDocumentIdStatusGet, getPipelineStatusApiV1PipelinesPipelineIdStatusGet, searchPipelinesApiV1PipelinesGet, upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut, upsertPipelineApiV1PipelinesPut } from "@llamaindex/cloud/api"; import { getEnv } from "@llamaindex/env"; import { Settings } from "../Settings.js"; import { QueryEngineTool } from "../tools/QueryEngineTool.js"; export class LlamaCloudIndex { params; constructor(params){ this.params = params; initService(this.params); } async waitForPipelineIngestion(verbose = Settings.debug, raiseOnError = false) { const pipelineId = await this.getPipelineId(); if (verbose) { console.log("Waiting for pipeline ingestion: "); } while(true){ const { data: pipelineStatus } = await getPipelineStatusApiV1PipelinesPipelineIdStatusGet({ path: { pipeline_id: pipelineId }, throwOnError: true }); if (pipelineStatus.status === "SUCCESS") { if (verbose) { console.log("Pipeline ingestion completed successfully"); } break; } if (pipelineStatus.status === "ERROR") { if (verbose) { console.error("Pipeline ingestion failed"); } if (raiseOnError) { throw new Error("Pipeline ingestion failed"); } } if (verbose) { process.stdout.write("."); } await new Promise((resolve)=>setTimeout(resolve, 1000)); } } async waitForDocumentIngestion(docIds, verbose = Settings.debug, raiseOnError = false) { const pipelineId = await this.getPipelineId(); if (verbose) { console.log("Loading data: "); } const pendingDocs = new Set(docIds); while(pendingDocs.size){ const docsToRemove = new Set(); for (const doc of pendingDocs){ const { data: { status } } = await getPipelineDocumentStatusApiV1PipelinesPipelineIdDocumentsDocumentIdStatusGet({ path: { pipeline_id: pipelineId, document_id: doc }, throwOnError: true }); if (status === "NOT_STARTED" || status === "IN_PROGRESS") { continue; } if (status === "ERROR") { if (verbose) { console.error(`Document ingestion failed for ${doc}`); } if (raiseOnError) { throw new Error(`Document ingestion failed for ${doc}`); } } docsToRemove.add(doc); } for (const doc of docsToRemove){ pendingDocs.delete(doc); } if (pendingDocs.size) { if (verbose) { process.stdout.write("."); } await new Promise((resolve)=>setTimeout(resolve, 500)); } } if (verbose) { console.log("Done!"); } await this.waitForPipelineIngestion(verbose, raiseOnError); } async getPipelineId(name, projectName, organizationId) { return await getPipelineId(name ?? this.params.name, projectName ?? this.params.projectName, organizationId ?? this.params.organizationId); } async getProjectId(projectName, organizationId) { return await getProjectId(projectName ?? this.params.projectName, organizationId ?? this.params.organizationId); } /** * Adds documents to the given index parameters. If the index does not exist, it will be created. * * @param params - An object containing the following properties: * - documents: An array of Document objects to be added to the index. * - verbose: Optional boolean to enable verbose logging. * - Additional properties from CloudConstructorParams. * @returns A Promise that resolves to a new LlamaCloudIndex instance. */ static async fromDocuments(params, config) { const index = new LlamaCloudIndex({ ...params }); await index.ensureIndex({ ...config, verbose: params.verbose ?? false }); await index.addDocuments(params.documents, params.verbose); return index; } async addDocuments(documents, verbose) { const apiUrl = getAppBaseUrl(); const projectId = await this.getProjectId(); const pipelineId = await this.getPipelineId(); await upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut({ path: { pipeline_id: pipelineId }, body: documents.map((doc)=>({ metadata: doc.metadata, text: doc.text, excluded_embed_metadata_keys: doc.excludedEmbedMetadataKeys, excluded_llm_metadata_keys: doc.excludedEmbedMetadataKeys, id: doc.id_ })) }); while(true){ const { data: pipelineStatus } = await getPipelineStatusApiV1PipelinesPipelineIdStatusGet({ path: { pipeline_id: pipelineId }, throwOnError: true }); if (pipelineStatus.status === "SUCCESS") { console.info("Documents ingested successfully, pipeline is ready to use"); break; } if (pipelineStatus.status === "ERROR") { console.error(`Some documents failed to ingest, check your pipeline logs at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`); throw new Error("Some documents failed to ingest"); } if (pipelineStatus.status === "PARTIAL_SUCCESS") { console.info(`Documents ingestion partially succeeded, to check a more complete status check your pipeline at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`); break; } if (verbose) { process.stdout.write("."); } await new Promise((resolve)=>setTimeout(resolve, 1000)); } if (verbose) { console.info(`Ingestion completed, find your index at ${apiUrl}/project/${projectId}/deploy/${pipelineId}`); } } asRetriever(params = {}) { return new LlamaCloudRetriever({ ...this.params, ...params }); } asQueryEngine(params) { const retriever = new LlamaCloudRetriever({ ...this.params, ...params }); return new RetrieverQueryEngine(retriever, params?.responseSynthesizer, params?.nodePostprocessors); } asQueryTool(params) { if (params.options) { params.retriever = this.asRetriever(params.options); } return new QueryEngineTool({ queryEngine: this.asQueryEngine(params), metadata: params?.metadata, includeSourceNodes: params?.includeSourceNodes ?? false }); } queryTool(params) { return this.asQueryTool(params); } async insert(document) { const pipelineId = await this.getPipelineId(); await createBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPost({ path: { pipeline_id: pipelineId }, body: [ { metadata: document.metadata, text: document.text, excluded_embed_metadata_keys: document.excludedLlmMetadataKeys, excluded_llm_metadata_keys: document.excludedEmbedMetadataKeys, id: document.id_ } ] }); await this.waitForDocumentIngestion([ document.id_ ]); } async delete(document) { const pipelineId = await this.getPipelineId(); await deletePipelineDocumentApiV1PipelinesPipelineIdDocumentsDocumentIdDelete({ path: { pipeline_id: pipelineId, document_id: document.id_ } }); await this.waitForPipelineIngestion(); } async refreshDoc(document) { const pipelineId = await this.getPipelineId(); await upsertBatchPipelineDocumentsApiV1PipelinesPipelineIdDocumentsPut({ path: { pipeline_id: pipelineId }, body: [ { metadata: document.metadata, text: document.text, excluded_embed_metadata_keys: document.excludedLlmMetadataKeys, excluded_llm_metadata_keys: document.excludedEmbedMetadataKeys, id: document.id_ } ] }); await this.waitForDocumentIngestion([ document.id_ ]); } async ensureIndex(config) { const projectId = await this.getProjectId(); const { data: pipelines } = await searchPipelinesApiV1PipelinesGet({ query: { project_id: projectId, pipeline_name: this.params.name }, throwOnError: true }); if (pipelines.length === 0) { // no pipeline found, create a new one let embeddingConfig = config?.embedding; if (!embeddingConfig) { // no embedding config provided, use OpenAI as default const openAIApiKey = getEnv("OPENAI_API_KEY"); const embeddingModel = getEnv("EMBEDDING_MODEL"); if (!openAIApiKey || !embeddingModel) { throw new Error("No embedding configuration provided. Fallback to OpenAI embedding model. OPENAI_API_KEY and EMBEDDING_MODEL environment variables must be set."); } embeddingConfig = { type: "OPENAI_EMBEDDING", component: { api_key: openAIApiKey, model_name: embeddingModel } }; } let transformConfig = config?.transform; if (!transformConfig) { transformConfig = { mode: "auto", chunk_size: 1024, chunk_overlap: 200 }; } const { data: pipeline } = await upsertPipelineApiV1PipelinesPut({ query: { project_id: projectId }, body: { name: this.params.name, embedding_config: embeddingConfig, transform_config: transformConfig }, throwOnError: true }); if (config?.verbose) { console.log(`Created pipeline ${pipeline.id} with name ${pipeline.name}`); } } } }