UNPKG

@langchain/community

Version:
203 lines (202 loc) 8.36 kB
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" }); const require_runtime = require("../_virtual/_rolldown/runtime.cjs"); const require_utils_cassandra = require("../utils/cassandra.cjs"); let _langchain_core_stores = require("@langchain/core/stores"); //#region src/storage/cassandra.ts var cassandra_exports = /* @__PURE__ */ require_runtime.__exportAll({ CassandraKVStore: () => CassandraKVStore }); /** * A concrete implementation of BaseStore for interacting with a Cassandra database. * It provides methods to get, set, delete, and yield keys based on specified criteria. */ var CassandraKVStore = class extends _langchain_core_stores.BaseStore { lc_namespace = ["langchain", "storage"]; cassandraTable; options; colKey; colKeyMap; colVal; keyDelimiter; inClauseSize = 1e3; yieldKeysFetchSize = 5e3; constructor(options) { super(options); this.options = options; this.colKey = { name: "key", type: "text", partition: true }; this.colKeyMap = { name: "key_map", type: "map<tinyint,text>" }; this.colVal = { name: "val", type: "blob" }; this.keyDelimiter = options.keyDelimiter || "/"; } /** * Retrieves the values associated with an array of keys from the Cassandra database. * It chunks requests for large numbers of keys to manage performance and Cassandra limitations. * @param keys An array of keys for which to retrieve values. * @returns A promise that resolves with an array of Uint8Array or undefined, corresponding to each key. */ async mget(keys) { await this.ensureTable(); const processFunction = async (chunkKeys) => { const chunkResults = await this.cassandraTable.select([this.colKey, this.colVal], [{ name: this.colKey.name, operator: "IN", value: chunkKeys }]); const useMap = chunkKeys.length > 25; const rowsMap = useMap ? new Map(chunkResults.rows.map((row) => [row[this.colKey.name], row])) : null; return chunkKeys.map((key) => { const row = useMap && rowsMap ? rowsMap.get(key) : chunkResults.rows.find((row) => row[this.colKey.name] === key); if (row && row[this.colVal.name]) { const buffer = row[this.colVal.name]; return new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.byteLength); } }); }; return await this.processInChunks(keys, processFunction) || []; } /** * Sets multiple key-value pairs in the Cassandra database. * Each key-value pair is processed to ensure compatibility with Cassandra's storage requirements. * @param keyValuePairs An array of key-value pairs to set in the database. * @returns A promise that resolves when all key-value pairs have been set. */ async mset(keyValuePairs) { await this.ensureTable(); const values = keyValuePairs.map(([key, value]) => { return [ key, key.split(this.keyDelimiter).reduce((acc, segment, index) => { acc[index] = segment; return acc; }, {}), Buffer.from(value.buffer, value.byteOffset, value.byteLength) ]; }); await this.cassandraTable.upsert(values, [ this.colKey, this.colKeyMap, this.colVal ]); } /** * Deletes multiple keys and their associated values from the Cassandra database. * @param keys An array of keys to delete from the database. * @returns A promise that resolves when all specified keys have been deleted. */ async mdelete(keys) { if (keys.length > 0) { await this.ensureTable(); const processFunction = async (chunkKeys) => { const filter = { name: this.colKey.name, operator: "IN", value: chunkKeys }; await this.cassandraTable.delete(filter); }; await this.processInChunks(keys, processFunction); } } /** * Yields keys from the Cassandra database optionally based on a prefix, based * on the store's keyDelimiter. This method pages through results efficiently * for large datasets. * @param prefix An optional prefix to filter the keys to be yielded. * @returns An async generator that yields keys from the database. */ async *yieldKeys(prefix) { await this.ensureTable(); const filter = []; if (prefix) { let segments = prefix.split(this.keyDelimiter); if (segments[segments.length - 1] === "") segments = segments.slice(0, -1); segments.forEach((segment, index) => { filter.push({ name: `${this.colKeyMap.name}[${index}]`, operator: "=", value: segment }); }); } let currentPageState; do { const results = await this.cassandraTable.select([this.colKey], filter, void 0, void 0, false, this.yieldKeysFetchSize, currentPageState); for (const row of results.rows) yield row[this.colKey.name]; currentPageState = results.pageState; } while (currentPageState); } /** * Ensures the Cassandra table is initialized and ready for operations. * This method is called internally before database operations. * @returns A promise that resolves when the table is ensured to exist and be accessible. */ async ensureTable() { if (this.cassandraTable) return; this.cassandraTable = await new require_utils_cassandra.CassandraTable({ ...this.options, primaryKey: [this.colKey], nonKeyColumns: [this.colKeyMap, this.colVal], indices: [{ name: this.colKeyMap.name, value: `( ENTRIES (${this.colKeyMap.name}))` }] }); } /** * Processes an array of keys in chunks, applying a given processing function to each chunk. * This method is designed to handle large sets of keys by breaking them down into smaller * manageable chunks, applying the processing function to each chunk sequentially. This approach * helps in managing resource utilization and adhering to database query limitations. * * The method is generic, allowing for flexible processing functions that can either perform actions * without returning a result (e.g., deletion operations) or return a result (e.g., data retrieval). * This design enables the method to be used across a variety of batch processing scenarios. * * @template T The type of elements in the result array when the processFunction returns data. This * is used to type the resolution of the promise returned by processFunction. For void * operations, T can be omitted or set to any empty interface or null type. * @param keys The complete array of keys to be processed. The method chunks this array * based on the specified CHUNK_SIZE. * @param processFunction A function that will be applied to each chunk of keys. This function * should accept an array of strings (chunkKeys) and return a Promise * that resolves to either void (for operations that don't produce a result, * like deletion) or an array of type T (for operations that fetch data, * like retrieval). The array of type T should match the template parameter. * @param CHUNK_SIZE (optional) The maximum size of each chunk. If not specified, the class's * `inClauseSize` property is used as the default chunk size. This value determines * how many keys are included in each chunk and should be set based on the * operation's performance characteristics and any limitations of the underlying * storage system. * * @returns A Promise that resolves to void if the processing function returns void, or an array * of type T if the processing function returns data. If the processing function returns * data for each chunk, the results from all chunks are concatenated and returned as a * single array. If the processing function does not return data, the method resolves to undefined, * aligning with the void return expectation for non-data-returning operations. */ async processInChunks(keys, processFunction, CHUNK_SIZE = this.inClauseSize) { let results = []; for (let i = 0; i < keys.length; i += CHUNK_SIZE) { const chunkResult = await processFunction(keys.slice(i, i + CHUNK_SIZE)); if (Array.isArray(chunkResult)) results = results.concat(chunkResult); } return results.length > 0 ? results : void 0; } }; //#endregion exports.CassandraKVStore = CassandraKVStore; Object.defineProperty(exports, "cassandra_exports", { enumerable: true, get: function() { return cassandra_exports; } }); //# sourceMappingURL=cassandra.cjs.map