UNPKG

@gensx/storage

Version:

Cloud storage, blobs, sqlite, and vector database providers/hooks for GenSX.

674 lines (670 loc) 25.4 kB
/** * Check out the docs at https://www.gensx.com/docs * Find us on Github https://github.com/gensx-inc/gensx * Find us on Discord https://discord.gg/F5BSU8Kc */ import * as crypto from 'node:crypto'; import { createReadStream, createWriteStream } from 'node:fs'; import * as fs from 'node:fs/promises'; import * as path from 'node:path'; import { fromBase64UrlSafe, toBase64UrlSafe } from '../utils/base64.js'; import { BlobError, BlobNotFoundError, BlobPermissionDeniedError, BlobConflictError, BlobInternalError } from './types.js'; /* eslint-disable @typescript-eslint/only-throw-error */ /** * Helper to convert between filesystem errors and BlobErrors * @param err The error to convert * @param operation The operation that failed * @param path The path where the error occurred * @throws BlobError with appropriate error code and message */ function handleFsError(err, operation, path) { if (err instanceof Error) { const nodeErr = err; if (nodeErr.code === "ENOENT") { throw new BlobNotFoundError(`Blob not found at path: ${path}`, err); } else if (nodeErr.code === "EACCES") { throw new BlobPermissionDeniedError(`Permission denied for operation ${operation} on path: ${path}`, err); } else if (nodeErr.code === "EEXIST") { throw new BlobConflictError(`File already exists at path: ${path}`, err); } else if (nodeErr.code === "ENOTEMPTY") { throw new BlobConflictError(`Directory not empty at path: ${path}`, err); } else if (nodeErr.code === "ENOTDIR") { throw new BlobInternalError(`Path is not a directory: ${path}`, err); } } // Default error case throw new BlobInternalError(`Error during ${operation}: ${String(err)}`, err); } /** * Calculate an MD5 hash of the content for use as an ETag * @param content The content to hash * @returns The MD5 hash as a hex string */ function calculateEtag(content) { return crypto.createHash("md5").update(content).digest("hex"); } /** * Generate a unique filename for metadata * @param filePath The path to the blob file * @returns The path to the metadata file */ function getMetadataPath(filePath) { return `${filePath}.metadata.json`; } /** * Implementation of Blob interface for filesystem storage * @template T The type of data stored in the blob */ class FileSystemBlob { filePath; metadataPath; /** * Create a new FileSystemBlob * @param filePath The path to the blob file */ constructor(filePath) { this.filePath = filePath; this.metadataPath = getMetadataPath(filePath); } /** * Get JSON data from the blob * @returns The JSON data or null if the blob doesn't exist * @throws BlobError if there's an error reading the blob */ async getJSON() { try { const content = await fs.readFile(this.filePath, "utf8"); return JSON.parse(content); } catch (err) { if (err.code === "ENOENT") { return null; } throw handleFsError(err, "getJSON", this.filePath); } } /** * Get string data from the blob * @returns The string data or null if the blob doesn't exist * @throws BlobError if there's an error reading the blob */ async getString() { try { return await fs.readFile(this.filePath, "utf8"); } catch (err) { if (err.code === "ENOENT") { return null; } throw handleFsError(err, "getString", this.filePath); } } /** * Get a readable stream from the blob * @returns A readable stream of the blob content * @throws BlobError if there's an error creating the stream */ async getStream() { try { // Check if file exists first try { await fs.access(this.filePath); } catch (err) { if (err.code === "ENOENT") { throw new BlobNotFoundError(`Blob not found at path: ${this.filePath}`); } throw err; } // Create and return readable stream const stream = createReadStream(this.filePath); // Handle stream errors stream.on("error", (err) => { throw handleFsError(err, "getStream:read", this.filePath); }); return stream; } catch (err) { throw handleFsError(err, "getStream", this.filePath); } } /** * Get raw binary data from the blob * @returns The raw data and metadata or null if the blob doesn't exist * @throws BlobError if there's an error reading the blob */ async getRaw() { try { // Read the raw buffer const content = await fs.readFile(this.filePath); const stats = await fs.stat(this.filePath); const metadataResult = await this.getMetadata(); const etag = calculateEtag(content); // Check if content is base64 encoded const isBase64 = metadataResult?.isBase64 === "true"; const decodedContent = isBase64 ? Buffer.from(content.toString(), "base64") : content; // Remove contentType and etag from metadata if they exist const { contentType, etag: _, ...metadata } = metadataResult ?? {}; return { content: decodedContent, etag, lastModified: stats.mtime, size: stats.size, contentType: metadataResult?.contentType ?? "application/octet-stream", metadata: metadataResult ? Object.keys(metadata).length > 0 ? metadata : undefined : undefined, }; } catch (err) { if (err.code === "ENOENT") { return null; } throw handleFsError(err, "getRaw", this.filePath); } } /** * Put JSON data into the blob * @param value The JSON data to store * @param options Optional metadata and ETag * @returns The ETag of the stored data * @throws BlobError if there's an error storing the data */ async putJSON(value, options) { try { await fs.mkdir(path.dirname(this.filePath), { recursive: true }); const content = JSON.stringify(value); const newEtag = calculateEtag(content); if (options?.etag) { try { const existingContent = await fs.readFile(this.filePath, "utf8"); const existingEtag = calculateEtag(existingContent); if (existingEtag !== options.etag) { throw new BlobConflictError(`ETag mismatch: expected ${options.etag} but found ${existingEtag}`); } } catch (err) { // If the error is already a BlobError, rethrow it if (err instanceof BlobError) { throw err; } // Only use handleFsError for filesystem errors if (err.code !== "ENOENT") { throw handleFsError(err, "putJSON:etag-check", this.filePath); } } } await fs.writeFile(this.filePath, content, "utf8"); // Always create metadata file with content type const metadata = { ...(options?.metadata ?? {}), contentType: options?.contentType ?? "application/json", }; await this.updateMetadata(metadata); return { etag: newEtag }; } catch (err) { // If the error is already a BlobError, rethrow it if (err instanceof BlobError) { throw err; } throw handleFsError(err, "putJSON", this.filePath); } } /** * Put string data into the blob * @param value The string data to store * @param options Optional metadata and ETag * @returns The ETag of the stored data * @throws BlobError if there's an error storing the data */ async putString(value, options) { try { await fs.mkdir(path.dirname(this.filePath), { recursive: true }); const newEtag = calculateEtag(value); if (options?.etag) { try { const existingContent = await fs.readFile(this.filePath, "utf8"); const existingEtag = calculateEtag(existingContent); if (existingEtag !== options.etag) { throw new BlobConflictError(`ETag mismatch: expected ${options.etag} but found ${existingEtag}`); } } catch (err) { if (err.code !== "ENOENT") { throw handleFsError(err, "putString:etag-check", this.filePath); } } } await fs.writeFile(this.filePath, value, "utf8"); // Always create metadata file with content type const metadata = { ...(options?.metadata ?? {}), contentType: options?.contentType ?? "text/plain", }; await this.updateMetadata(metadata); return { etag: newEtag }; } catch (err) { throw handleFsError(err, "putString", this.filePath); } } /** * Put raw binary data into the blob * @param value The binary data to store * @param options Optional metadata and ETag * @returns The ETag of the stored data * @throws BlobError if there's an error storing the data */ async putRaw(value, options) { try { await fs.mkdir(path.dirname(this.filePath), { recursive: true }); const newEtag = calculateEtag(value); if (options?.etag) { try { const existingContent = await fs.readFile(this.filePath); const existingEtag = calculateEtag(existingContent); if (existingEtag !== options.etag) { throw new BlobConflictError(`ETag mismatch: expected ${options.etag} but found ${existingEtag}`); } } catch (err) { if (err.code !== "ENOENT") { throw handleFsError(err, "putRaw:etag-check", this.filePath); } } } // Write the raw buffer await fs.writeFile(this.filePath, value); // Always create metadata file with content type const metadata = { ...(options?.metadata ?? {}), contentType: options?.contentType ?? "application/octet-stream", }; await this.updateMetadata(metadata); return { etag: newEtag }; } catch (err) { throw handleFsError(err, "putRaw", this.filePath); } } /** * Get the blob content and metadata * @returns The blob content and metadata or null if the blob doesn't exist * @throws BlobError if there's an error reading the blob */ async get() { try { const content = await fs.readFile(this.filePath, "utf8"); const stats = await fs.stat(this.filePath); const metadata = await this.getMetadata(); const etag = calculateEtag(content); return { content: JSON.parse(content), etag, lastModified: stats.mtime, size: stats.size, contentType: metadata?.contentType ?? "application/json", metadata: metadata ?? undefined, }; } catch (err) { if (err instanceof Error && err.code === "ENOENT") { return null; // File doesn't exist } throw handleFsError(err, "get", this.filePath); } } /** * Put data from a stream into the blob * @param stream The stream to read from * @param options Optional metadata and ETag * @returns The ETag of the stored data * @throws BlobError if there's an error storing the data */ async putStream(stream, options) { try { await fs.mkdir(path.dirname(this.filePath), { recursive: true }); // Create write stream const writeStream = createWriteStream(this.filePath); const chunks = []; // Handle stream errors stream.on("error", (err) => { writeStream.destroy(); throw handleFsError(err, "putStream:read", this.filePath); }); writeStream.on("error", (err) => { stream.destroy(); throw handleFsError(err, "putStream:write", this.filePath); }); // Collect chunks and write to file for await (const chunk of stream) { const buffer = Buffer.from(chunk); chunks.push(buffer); writeStream.write(buffer); } // Wait for write to complete await new Promise((resolve, reject) => { writeStream.end((err) => { if (err) reject(err); else resolve(); }); }); // Calculate etag from all chunks const buffer = Buffer.concat(chunks); const etag = calculateEtag(buffer); // Always create metadata file with content type const metadata = { ...(options?.metadata ?? {}), contentType: options?.contentType ?? "application/octet-stream", }; await this.updateMetadata(metadata); return { etag }; } catch (err) { throw handleFsError(err, "putStream", this.filePath); } } /** * Delete the blob * @throws BlobError if there's an error deleting the blob */ async delete() { try { try { await fs.unlink(this.filePath); } catch (err) { if (err.code !== "ENOENT") { throw handleFsError(err, "delete:file", this.filePath); } // File already doesn't exist, continue to delete metadata } // Also delete metadata file if it exists try { await fs.unlink(this.metadataPath); } catch (err) { if (err.code !== "ENOENT") { throw handleFsError(err, "delete:metadata", this.metadataPath); } // Metadata file doesn't exist, ignore } } catch (err) { throw handleFsError(err, "delete", this.filePath); } } /** * Check if the blob exists * @returns true if the blob exists, false otherwise */ async exists() { try { await fs.access(this.filePath); return true; } catch { return false; } } /** * Get the blob metadata * @returns The blob metadata or null if the blob doesn't exist * @throws BlobError if there's an error reading the metadata */ async getMetadata() { try { const content = await fs.readFile(this.metadataPath, "utf8"); const metadata = JSON.parse(content); // Calculate etag from the blob content instead of the metadata content try { const blobContent = await fs.readFile(this.filePath); const etag = calculateEtag(blobContent); return { ...metadata, etag }; } catch (blobErr) { if (blobErr.code === "ENOENT") { // If blob doesn't exist but metadata does, return metadata without etag return metadata; } throw handleFsError(blobErr, "getMetadata:blob", this.filePath); } } catch (err) { if (err.code === "ENOENT") { return null; // Metadata file doesn't exist } throw handleFsError(err, "getMetadata", this.metadataPath); } } /** * Update metadata associated with the blob * @param metadata The new metadata to store * @param options Optional ETag for conditional update * @throws BlobError if there's an error updating the metadata */ async updateMetadata(metadata, options) { try { // If etag is provided, verify it matches before updating if (options?.etag) { try { const existingContent = await fs.readFile(this.filePath); const existingEtag = calculateEtag(existingContent); if (existingEtag !== options.etag) { throw new BlobConflictError(`ETag mismatch: expected ${options.etag} but found ${existingEtag}`); } } catch (err) { if (err.code !== "ENOENT") { throw handleFsError(err, "updateMetadata:etag-check", this.filePath); } } } await fs.mkdir(path.dirname(this.metadataPath), { recursive: true }); // Get existing metadata to preserve contentType let existingMetadata = {}; try { const content = await fs.readFile(this.metadataPath, "utf8"); existingMetadata = JSON.parse(content); } catch (err) { if (err.code !== "ENOENT") { throw handleFsError(err, "updateMetadata:read", this.metadataPath); } } // Preserve contentType if it exists in existing metadata const contentType = existingMetadata.contentType; const newMetadata = { ...metadata }; if (contentType) { newMetadata.contentType = contentType; } await fs.writeFile(this.metadataPath, JSON.stringify(newMetadata), "utf8"); } catch (err) { throw handleFsError(err, "updateMetadata", this.metadataPath); } } } /** * FileSystem implementation of blob storage */ class FileSystemBlobStorage { rootDir; defaultPrefix; /** * Create a new FileSystemBlobStorage * @param rootDir The root directory for blob storage * @param defaultPrefix Optional default prefix for all blob keys */ constructor(rootDir, defaultPrefix) { this.rootDir = rootDir; this.defaultPrefix = defaultPrefix; // Ensure rootDir exists on instantiation void this.ensureRootDir(); } /** * Ensure the root directory exists * @throws BlobError if there's an error creating the directory */ async ensureRootDir() { try { await fs.mkdir(this.rootDir, { recursive: true }); } catch (err) { throw handleFsError(err, "ensureRootDir", this.rootDir); } } /** * Get the full path for a blob key * @param key The blob key * @returns The full path to the blob */ getFullPath(key) { // Normalize key by removing leading/trailing slashes const normalizedKey = key.replace(/^\/+|\/+$/g, ""); // Apply default prefix if specified const prefixedKey = this.defaultPrefix ? `${this.defaultPrefix}/${normalizedKey}` : normalizedKey; return path.join(this.rootDir, prefixedKey); } /** * Get a blob by key * @param key The blob key * @returns A Blob instance for the given key */ getBlob(key) { return new FileSystemBlob(this.getFullPath(key)); } /** * List all blobs with optional pagination * @param options Options for listing blobs including prefix and pagination * @returns A paginated list of blob keys * @throws BlobError if there's an error listing blobs */ async listBlobs(options) { try { const { prefix = "", limit = 100, cursor = undefined, } = (options ?? {}); // Normalize prefixes by removing trailing slashes const normalizedDefaultPrefix = this.defaultPrefix?.replace(/\/$/, ""); const normalizedPrefix = prefix.replace(/\/$/, ""); // Build the search prefix const searchPrefix = normalizedDefaultPrefix ? normalizedPrefix ? `${normalizedDefaultPrefix}/${normalizedPrefix}` : normalizedDefaultPrefix : normalizedPrefix; const searchPath = path.join(this.rootDir, searchPrefix); try { // Check if directory exists await fs.access(searchPath); } catch (err) { if (err.code === "ENOENT") { return { blobs: [], nextCursor: undefined }; // Directory doesn't exist } throw handleFsError(err, "listBlobs:access", searchPath); } // Recursive function to list files with stats const listFilesRecursively = async (dir, baseDir) => { const items = await fs.readdir(dir, { withFileTypes: true }); const files = []; for (const item of items) { const fullPath = path.join(dir, item.name); const relativePath = path.relative(baseDir, fullPath); if (item.isDirectory()) { const subFiles = await listFilesRecursively(fullPath, baseDir); files.push(...subFiles); } else if (!item.name.endsWith(".metadata.json")) { // Get file stats const stats = await fs.stat(fullPath); files.push({ key: relativePath, lastModified: stats.mtime.toISOString(), size: stats.size, }); } } return files.sort((a, b) => a.key.localeCompare(b.key)); // Sort for consistent pagination }; let allFiles = await listFilesRecursively(searchPath, this.rootDir); // Remove default prefix from results if it exists if (normalizedDefaultPrefix) { allFiles = allFiles .filter((file) => file.key === normalizedDefaultPrefix || file.key.startsWith(`${normalizedDefaultPrefix}/`)) .map((file) => ({ ...file, key: file.key === normalizedDefaultPrefix ? "" : file.key.slice(normalizedDefaultPrefix.length + 1), })); } // Handle cursor-based pagination let startIndex = 0; if (cursor) { const decodedCursor = fromBase64UrlSafe(cursor); startIndex = allFiles.findIndex((file) => file.key > decodedCursor); if (startIndex === -1) startIndex = allFiles.length; } // Handle limit const endIndex = Math.min(startIndex + limit, allFiles.length); const items = allFiles.slice(startIndex, endIndex); // Generate next cursor const nextCursor = endIndex < allFiles.length ? toBase64UrlSafe(allFiles[endIndex - 1].key) : undefined; return { blobs: items, nextCursor, }; } catch (err) { throw handleFsError(err, "listBlobs", this.rootDir); } } /** * Check if a blob exists * @param key The blob key * @returns True if the blob exists, false otherwise */ async blobExists(key) { const blob = this.getBlob(key); return blob.exists(); } /** * Delete a blob * @param key The blob key * @returns The result of the delete operation */ async deleteBlob(key) { try { const blob = this.getBlob(key); const existed = await blob.exists(); await blob.delete(); return { deleted: existed }; } catch (err) { if (err instanceof BlobError) { throw err; } throw handleFsError(err, "deleteBlob", this.getFullPath(key)); } } } export { FileSystemBlobStorage }; //# sourceMappingURL=filesystem.js.map