UNPKG

bigquery-client

Version:

A feature-rich Node.js client for Google BigQuery with support for CRUD operations, transactions, query building, and advanced features like aggregate functions, pagination, and logging.

715 lines (714 loc) 25.7 kB
/** * @fileoverview BigQuery Client ORM - A comprehensive TypeScript ORM for Google BigQuery * @version 1.0.6 * @author Pravin Jadhav * @description This module provides a high-level interface for interacting with Google BigQuery, * including CRUD operations, query caching, metrics collection, and advanced features like * materialized views and partitioned tables. */ import { QueryResult, MaterializedViewConfig, PartitionedTableConfig } from '../types'; /** * Configuration interface for BigQuery client initialization * @interface BigQueryClientConfig */ interface BigQueryClientConfig { /** Google Cloud Project ID */ projectId: string; /** BigQuery Dataset ID */ datasetId: string; /** Enable query and error logging (default: false) */ enableLogging?: boolean; /** Enable query result caching (default: false) */ enableCache?: boolean; /** Cache time-to-live in milliseconds (default: 300000 = 5 minutes) */ cacheTtl?: number; /** Maximum number of cached queries (default: 1000) */ cacheMaxSize?: number; } /** * Configuration interface for SELECT query operations * @interface SelectOptions */ interface SelectOptions { /** Target table name */ table: string; /** Columns to select - can be array of strings or object with table mappings */ columns?: string[] | Record<string, string[]>; /** JOIN configurations for multi-table queries */ joins?: { /** Table to join with */ table: string; /** Join conditions mapping */ on: Record<string, string>; /** Type of join operation */ type?: 'INNER' | 'LEFT' | 'RIGHT' | 'FULL'; }[]; /** WHERE clause conditions */ where?: Record<string, any>; /** GROUP BY columns */ groupBy?: string[]; /** ORDER BY configurations */ orderBy?: { column: string; direction?: 'ASC' | 'DESC'; }[]; /** Maximum number of rows to return */ limit?: number; /** Number of rows to skip */ offset?: number; } /** * Configuration interface for INSERT operations * @interface InsertOptions */ interface InsertOptions { /** Target table name */ table: string; /** Array of row objects to insert */ rows: Record<string, any>[]; } /** * Configuration interface for UPDATE operations * @interface UpdateOptions */ interface UpdateOptions { /** Target table name */ table: string; /** Fields to update with their new values */ set: Record<string, any>; /** WHERE clause conditions for row selection */ where: Record<string, any>; } /** * Configuration interface for DELETE operations * @interface DeleteOptions */ interface DeleteOptions { /** Target table name */ table: string; /** WHERE clause conditions for row selection */ where: Record<string, any>; } /** * Configuration interface for MERGE operations * @interface MergeOptions */ interface MergeOptions { /** Target table for merge operation */ targetTable: string; /** Source table for merge operation */ sourceTable: string; /** Join conditions between target and source */ on: Record<string, string>; /** Action to take when records match */ whenMatched?: string; /** Action to take when records don't match */ whenNotMatched?: string; } /** * BigQuery Client ORM - Main class for BigQuery operations * * This class provides a comprehensive interface for interacting with Google BigQuery, * including CRUD operations, query caching, metrics collection, SQL injection protection, * and advanced features like materialized views and partitioned tables. * * @class BigQueryClient * @example * ```typescript * const client = new BigQueryClient({ * projectId: 'my-project', * datasetId: 'my-dataset', * enableLogging: true, * enableCache: true * }); * * // Execute a SELECT query * const result = await client.select({ * table: 'users', * columns: ['id', 'name', 'email'], * where: { active: true }, * limit: 10 * }); * ``` */ export declare class BigQueryClient { /** Google BigQuery client instance */ private bigQuery; /** Google Cloud Project ID */ private projectId; /** BigQuery Dataset ID */ private datasetId; /** Logger instance for query and error logging */ private logger; /** Query cache instance for result caching */ private queryCache; /** Metrics collector for performance monitoring */ private metricsCollector; /** * Creates a new BigQueryClient instance * * @param {BigQueryClientConfig} config - Configuration object for the client * @example * ```typescript * const client = new BigQueryClient({ * projectId: 'my-gcp-project', * datasetId: 'analytics_data', * enableLogging: true, * enableCache: true, * cacheTtl: 600000, // 10 minutes * cacheMaxSize: 500 * }); * ``` */ constructor(config: BigQueryClientConfig); /** * Formats a table name to include the full BigQuery path * * @private * @param {string} table - The table name to format * @returns {string} Fully qualified table name in format `project.dataset.table` * @example * ```typescript * // Input: 'users' * // Output: '`my-project.my-dataset.users`' * ``` */ private formatTableName; /** * Executes a raw SQL query on BigQuery with caching and validation * * This method provides the core query execution functionality with built-in: * - SQL injection protection * - Parameter validation * - Query result caching * - Performance metrics collection * - Error handling and logging * * @template T - The expected type of the query result data * @param {string} sql - The SQL query string to execute * @param {any[]} [params] - Optional parameters for parameterized queries * @returns {Promise<QueryResult<T>>} Promise resolving to query results with metadata * @throws {BigQueryError} When query validation fails or execution errors occur * * @example * ```typescript * // Simple query * const result = await client.query('SELECT * FROM users WHERE active = ?', [true]); * * // With type safety * interface User { id: number; name: string; email: string; } * const users = await client.query<User>('SELECT id, name, email FROM users'); * * // Access results * console.log(result.data); // Query results * console.log(result.metadata.executionTime); // Performance metrics * console.log(result.metadata.cacheHit); // Whether result was cached * ``` */ query<T>(sql: string, params?: any[]): Promise<QueryResult<T>>; /** * Executes a dry run of a SQL query to analyze its execution plan * * This method performs query validation and cost estimation without actually * executing the query or processing any data. Useful for: * - Query optimization * - Cost estimation * - Syntax validation * - Performance planning * * @param {string} sql - The SQL query string to analyze * @param {any[]} [params] - Optional parameters for parameterized queries * @returns {Promise<any>} Promise resolving to query job metadata and execution plan * @throws {Error} When dry run execution fails * * @example * ```typescript * const plan = await client.explain('SELECT * FROM large_table WHERE date > ?', ['2023-01-01']); * console.log(plan.statistics.totalBytesProcessed); // Estimated bytes * console.log(plan.statistics.creationTime); // Query creation time * ``` */ explain(sql: string, params?: any[]): Promise<any>; /** * Executes a SELECT query with advanced features like JOINs, aggregations, and filtering * * This method provides a high-level interface for SELECT operations with support for: * - Multi-table JOINs with automatic aliasing * - Aggregate functions (SUM, COUNT, AVG, etc.) * - Complex WHERE conditions * - GROUP BY and ORDER BY clauses * - LIMIT and OFFSET for pagination * - Automatic SQL injection protection * * @param {SelectOptions} options - Configuration object for the SELECT query * @returns {Promise<{success: boolean, message: string, data: any[]}>} Promise resolving to query results * @throws {BigQueryError} When query construction or execution fails * * @example * ```typescript * // Simple SELECT * const users = await client.select({ * table: 'users', * columns: ['id', 'name', 'email'], * where: { active: true }, * limit: 10 * }); * * // Complex SELECT with JOINs and aggregation * const report = await client.select({ * table: 'orders', * columns: { * orders: ['id', 'total'], * users: ['name', 'email'] * }, * joins: [{ * table: 'users', * on: { 'orders.user_id': 'users.id' }, * type: 'INNER' * }], * where: { 'orders.status': 'completed' }, * orderBy: [{ column: 'total', direction: 'DESC' }] * }); * ``` */ select(options: SelectOptions): Promise<{ success: boolean; message: string; data: any[]; }>; /** * Inserts multiple rows into a BigQuery table with automatic validation * * This method provides a safe and efficient way to insert data with: * - Automatic SQL injection protection * - Batch processing for multiple rows * - Schema validation * - Error handling and logging * - Transaction-like behavior * * @param {InsertOptions} options - Configuration object containing table and row data * @returns {Promise<{success: boolean, message: string, affectedRows: number}>} Promise resolving to insert results * @throws {BigQueryError} When validation fails or insert operation encounters errors * * @example * ```typescript * // Insert single user * const result = await client.insert({ * table: 'users', * rows: [{ * name: 'John Doe', * email: 'john@example.com', * active: true, * created_at: new Date().toISOString() * }] * }); * * // Insert multiple users * const bulkResult = await client.insert({ * table: 'users', * rows: [ * { name: 'Alice', email: 'alice@example.com' }, * { name: 'Bob', email: 'bob@example.com' }, * { name: 'Charlie', email: 'charlie@example.com' } * ] * }); * * console.log(`Inserted ${bulkResult.affectedRows} rows`); * ``` */ insert(options: InsertOptions): Promise<{ success: boolean; message: string; affectedRows: number; }>; /** * Updates existing rows in a BigQuery table with conditional filtering * * This method provides secure and efficient row updates with: * - Mandatory WHERE clause to prevent accidental full table updates * - Automatic SQL injection protection * - Field validation and type checking * - Atomic update operations * - Comprehensive error handling * * @param {UpdateOptions} options - Configuration object containing table, update fields, and conditions * @returns {Promise<{success: boolean, message: string, affectedRows: number}>} Promise resolving to update results * @throws {BigQueryError} When validation fails, WHERE clause is empty, or update operation fails * * @example * ```typescript * // Update user status * const result = await client.update({ * table: 'users', * set: { * active: false, * updated_at: new Date().toISOString(), * status: 'inactive' * }, * where: { * user_id: 123, * email: 'user@example.com' * } * }); * * // Update with complex conditions * const bulkUpdate = await client.update({ * table: 'orders', * set: { status: 'shipped', shipped_date: '2023-12-01' }, * where: { status: 'processing', priority: 'high' } * }); * * console.log(`Updated ${result.affectedRows} rows`); * ``` */ update(options: UpdateOptions): Promise<{ success: boolean; message: string; affectedRows: number; }>; /** * Deletes rows from a BigQuery table with mandatory filtering conditions * * This method provides secure row deletion with built-in safety measures: * - Mandatory WHERE clause to prevent accidental full table deletion * - Automatic SQL injection protection * - Condition validation and type checking * - Atomic delete operations * - Comprehensive error handling and logging * * @param {DeleteOptions} options - Configuration object containing table and deletion conditions * @returns {Promise<{success: boolean, message: string, affectedRows: number}>} Promise resolving to deletion results * @throws {BigQueryError} When validation fails, WHERE clause is empty, or delete operation fails * * @example * ```typescript * // Delete specific user * const result = await client.delete({ * table: 'users', * where: { * user_id: 123, * active: false * } * }); * * // Delete old records * const cleanup = await client.delete({ * table: 'logs', * where: { * created_at: '< 2023-01-01', * level: 'debug' * } * }); * * // Delete with multiple conditions * const purge = await client.delete({ * table: 'temp_data', * where: { * status: 'processed', * expires_at: '< NOW()' * } * }); * * console.log(`Deleted ${result.affectedRows} rows`); * ``` */ delete(options: DeleteOptions): Promise<{ success: boolean; message: string; affectedRows: number; }>; /** * Performs a MERGE operation (UPSERT) between two BigQuery tables * * This method provides advanced data synchronization capabilities with: * - Conditional INSERT and UPDATE operations in a single statement * - Automatic handling of matching and non-matching records * - Customizable actions for different scenarios * - Atomic transaction behavior * - Comprehensive error handling * * @param {MergeOptions} options - Configuration object for the MERGE operation * @returns {Promise<any>} Promise resolving to merge operation results * @throws {BigQueryError} When merge operation fails or validation errors occur * * @example * ```typescript * // Basic MERGE operation * const result = await client.merge({ * targetTable: 'users', * sourceTable: 'user_updates', * on: { 'users.id': 'user_updates.user_id' }, * whenMatched: 'UPDATE SET name = source.name, email = source.email', * whenNotMatched: 'INSERT (id, name, email) VALUES (source.user_id, source.name, source.email)' * }); * * // Complex MERGE with multiple conditions * const syncResult = await client.merge({ * targetTable: 'inventory', * sourceTable: 'inventory_updates', * on: { * 'inventory.product_id': 'inventory_updates.product_id', * 'inventory.location': 'inventory_updates.location' * }, * whenMatched: 'UPDATE SET quantity = source.quantity, updated_at = CURRENT_TIMESTAMP()', * whenNotMatched: 'INSERT (product_id, location, quantity, created_at) VALUES (source.product_id, source.location, source.quantity, CURRENT_TIMESTAMP())' * }); * ``` */ merge(options: MergeOptions): Promise<any>; /** * Performs high-performance batch insert operations using BigQuery's native batch API * * This method is optimized for large-scale data insertion with: * - Direct BigQuery API integration for maximum performance * - Automatic batching and chunking of large datasets * - Schema auto-detection and validation * - Efficient memory usage for large datasets * - Comprehensive error handling with partial failure support * * @param {string} table - Target table name for batch insertion * @param {Record<string, any>[]} rows - Array of row objects to insert in batch * @returns {Promise<any>} Promise resolving to batch insert results with success/failure details * @throws {BigQueryError} When batch operation fails or validation errors occur * * @example * ```typescript * // Batch insert large dataset * const largeDataset = [ * { id: 1, name: 'User 1', email: 'user1@example.com' }, * { id: 2, name: 'User 2', email: 'user2@example.com' }, * // ... thousands more records * ]; * * const result = await client.batchInsert('users', largeDataset); * console.log('Batch insert completed:', result); * * // Handle partial failures * if (result.insertErrors && result.insertErrors.length > 0) { * console.log('Some rows failed:', result.insertErrors); * } * ``` */ batchInsert(table: string, rows: Record<string, any>[]): Promise<any>; /** * Performs real-time streaming insert operations for continuous data ingestion * * This method is designed for real-time data streaming scenarios with: * - Low-latency data insertion for streaming applications * - Automatic deduplication using insertId * - Schema evolution support * - Real-time data availability (no batch processing delays) * - Optimized for high-frequency, small-batch insertions * * @param {string} table - Target table name for streaming insertion * @param {any[]} rows - Array of row objects to stream insert * @returns {Promise<any>} Promise resolving to streaming insert results * @throws {BigQueryError} When streaming operation fails or validation errors occur * * @example * ```typescript * // Stream real-time events * const events = [ * { * timestamp: new Date().toISOString(), * event_type: 'user_login', * user_id: 123, * metadata: { ip: '192.168.1.1', browser: 'Chrome' } * }, * { * timestamp: new Date().toISOString(), * event_type: 'page_view', * user_id: 123, * metadata: { page: '/dashboard', duration: 5000 } * } * ]; * * const result = await client.streamInsert('events', events); * console.log('Events streamed successfully:', result); * * // For continuous streaming * setInterval(async () => { * const realtimeData = await fetchRealtimeData(); * await client.streamInsert('metrics', realtimeData); * }, 1000); * ``` */ streamInsert(table: string, rows: any[]): Promise<any>; /** * Flattens and normalizes nested BigQuery result objects for easier consumption * * This utility method processes complex BigQuery results with: * - Deep cloning to prevent reference issues * - Nested object flattening for complex data structures * - JSON serialization/deserialization for data normalization * - Type preservation where possible * - Memory-efficient processing for large result sets * * @template T - The expected type of the result objects * @param {T[]} results - Array of nested result objects from BigQuery * @returns {Promise<T[]>} Promise resolving to flattened and normalized results * @throws {BigQueryError} When flattening operation fails * * @example * ```typescript * // Flatten complex nested results * const complexResults = await client.query('SELECT * FROM nested_table'); * const flattened = await client.flattenResults(complexResults.data); * * // Use with type safety * interface NestedUser { * id: number; * profile: { name: string; settings: { theme: string } }; * } * * const users = await client.flattenResults<NestedUser>(rawResults); * console.log(users[0].profile.settings.theme); // Safely access nested data * ``` */ flattenResults<T>(results: T[]): Promise<T[]>; /** * Generates a unique cache key for query result caching * * @private * @param {string} sql - The SQL query string * @param {any[]} [params] - Optional query parameters * @returns {string} Unique cache key combining SQL and parameters */ private generateCacheKey; /** * Converts BigQuery schema object to human-readable string format * * @private * @param {any} schema - BigQuery schema object * @returns {string} Formatted schema string for logging and debugging */ private generateSchemaString; /** * Executes the actual BigQuery operation with parameter binding * * @private * @param {string} sql - The SQL query to execute * @param {any[]} [params] - Optional parameters for the query * @returns {Promise<any>} Promise resolving to raw BigQuery results */ private executeQuery; /** * Creates a materialized view for improved query performance and data freshness * * Materialized views provide significant performance benefits by: * - Pre-computing and storing query results for faster access * - Automatic refresh scheduling to maintain data freshness * - Reduced query costs by avoiding repeated computation * - Support for partitioning for large datasets * - Transparent query optimization by BigQuery * * @param {MaterializedViewConfig} config - Configuration object for materialized view creation * @returns {Promise<{success: boolean, message: string}>} Promise resolving to creation status * @throws {BigQueryError} When materialized view creation fails * * @example * ```typescript * // Create a materialized view for daily sales summary * const result = await client.createMaterializedView({ * name: 'daily_sales_summary', * query: ` * SELECT * DATE(created_at) as sale_date, * COUNT(*) as total_orders, * SUM(amount) as total_revenue, * AVG(amount) as avg_order_value * FROM orders * WHERE status = 'completed' * GROUP BY DATE(created_at) * `, * refreshInterval: '1 HOUR', * partitionField: 'sale_date' * }); * * // Create a real-time analytics view * const analyticsView = await client.createMaterializedView({ * name: 'user_activity_summary', * query: ` * SELECT * user_id, * COUNT(*) as total_actions, * MAX(timestamp) as last_activity * FROM user_events * GROUP BY user_id * `, * refreshInterval: '15 MINUTES' * }); * * console.log('Materialized view created:', result.success); * ``` */ createMaterializedView(config: MaterializedViewConfig): Promise<{ success: boolean; message: string; }>; /** * Creates a partitioned table for optimized performance and cost management * * Partitioned tables provide significant benefits for large datasets: * - Improved query performance by scanning only relevant partitions * - Reduced query costs by limiting data processed * - Better data organization and management * - Automatic partition pruning for date/time-based queries * - Support for various partition types (DATE, DATETIME, TIMESTAMP, INTEGER) * * @param {PartitionedTableConfig} config - Configuration object for partitioned table creation * @returns {Promise<{success: boolean, message: string}>} Promise resolving to creation status * @throws {BigQueryError} When partitioned table creation fails * * @example * ```typescript * // Create a date-partitioned events table * const result = await client.createPartitionedTable({ * name: 'user_events_partitioned', * schema: { * event_id: 'STRING', * user_id: 'INTEGER', * event_type: 'STRING', * timestamp: 'TIMESTAMP', * metadata: 'JSON' * }, * partitionType: 'DATE' as const, * partitionField: 'timestamp' * }); * * // Create an integer-partitioned table for sharding * const shardedTable = await client.createPartitionedTable({ * name: 'user_data_sharded', * schema: { * user_id: 'INTEGER', * name: 'STRING', * email: 'STRING', * created_at: 'TIMESTAMP', * shard_key: 'INTEGER' * }, * partitionType: 'RANGE' as const, * partitionField: 'shard_key' * }); * * // Create a time-partitioned table for real-time data * const timePartitioned = await client.createPartitionedTable({ * name: 'realtime_metrics', * schema: { * metric_name: 'STRING', * value: 'FLOAT', * timestamp: 'TIMESTAMP', * tags: 'JSON' * }, * partitionType: 'TIME' as const, * partitionField: 'timestamp' * }); * * console.log('Partitioned table created:', result.success); * ``` */ createPartitionedTable(config: PartitionedTableConfig): Promise<{ success: boolean; message: string; }>; } export {};