bigquery-client
Version:
A feature-rich Node.js client for Google BigQuery with support for CRUD operations, transactions, query building, and advanced features like aggregate functions, pagination, and logging.
715 lines (714 loc) • 25.7 kB
TypeScript
/**
* @fileoverview BigQuery Client ORM - A comprehensive TypeScript ORM for Google BigQuery
* @version 1.0.6
* @author Pravin Jadhav
* @description This module provides a high-level interface for interacting with Google BigQuery,
* including CRUD operations, query caching, metrics collection, and advanced features like
* materialized views and partitioned tables.
*/
import { QueryResult, MaterializedViewConfig, PartitionedTableConfig } from '../types';
/**
* Configuration interface for BigQuery client initialization
* @interface BigQueryClientConfig
*/
interface BigQueryClientConfig {
/** Google Cloud Project ID */
projectId: string;
/** BigQuery Dataset ID */
datasetId: string;
/** Enable query and error logging (default: false) */
enableLogging?: boolean;
/** Enable query result caching (default: false) */
enableCache?: boolean;
/** Cache time-to-live in milliseconds (default: 300000 = 5 minutes) */
cacheTtl?: number;
/** Maximum number of cached queries (default: 1000) */
cacheMaxSize?: number;
}
/**
* Configuration interface for SELECT query operations
* @interface SelectOptions
*/
interface SelectOptions {
/** Target table name */
table: string;
/** Columns to select - can be array of strings or object with table mappings */
columns?: string[] | Record<string, string[]>;
/** JOIN configurations for multi-table queries */
joins?: {
/** Table to join with */
table: string;
/** Join conditions mapping */
on: Record<string, string>;
/** Type of join operation */
type?: 'INNER' | 'LEFT' | 'RIGHT' | 'FULL';
}[];
/** WHERE clause conditions */
where?: Record<string, any>;
/** GROUP BY columns */
groupBy?: string[];
/** ORDER BY configurations */
orderBy?: {
column: string;
direction?: 'ASC' | 'DESC';
}[];
/** Maximum number of rows to return */
limit?: number;
/** Number of rows to skip */
offset?: number;
}
/**
* Configuration interface for INSERT operations
* @interface InsertOptions
*/
interface InsertOptions {
/** Target table name */
table: string;
/** Array of row objects to insert */
rows: Record<string, any>[];
}
/**
* Configuration interface for UPDATE operations
* @interface UpdateOptions
*/
interface UpdateOptions {
/** Target table name */
table: string;
/** Fields to update with their new values */
set: Record<string, any>;
/** WHERE clause conditions for row selection */
where: Record<string, any>;
}
/**
* Configuration interface for DELETE operations
* @interface DeleteOptions
*/
interface DeleteOptions {
/** Target table name */
table: string;
/** WHERE clause conditions for row selection */
where: Record<string, any>;
}
/**
* Configuration interface for MERGE operations
* @interface MergeOptions
*/
interface MergeOptions {
/** Target table for merge operation */
targetTable: string;
/** Source table for merge operation */
sourceTable: string;
/** Join conditions between target and source */
on: Record<string, string>;
/** Action to take when records match */
whenMatched?: string;
/** Action to take when records don't match */
whenNotMatched?: string;
}
/**
* BigQuery Client ORM - Main class for BigQuery operations
*
* This class provides a comprehensive interface for interacting with Google BigQuery,
* including CRUD operations, query caching, metrics collection, SQL injection protection,
* and advanced features like materialized views and partitioned tables.
*
* @class BigQueryClient
* @example
* ```typescript
* const client = new BigQueryClient({
* projectId: 'my-project',
* datasetId: 'my-dataset',
* enableLogging: true,
* enableCache: true
* });
*
* // Execute a SELECT query
* const result = await client.select({
* table: 'users',
* columns: ['id', 'name', 'email'],
* where: { active: true },
* limit: 10
* });
* ```
*/
export declare class BigQueryClient {
/** Google BigQuery client instance */
private bigQuery;
/** Google Cloud Project ID */
private projectId;
/** BigQuery Dataset ID */
private datasetId;
/** Logger instance for query and error logging */
private logger;
/** Query cache instance for result caching */
private queryCache;
/** Metrics collector for performance monitoring */
private metricsCollector;
/**
* Creates a new BigQueryClient instance
*
* @param {BigQueryClientConfig} config - Configuration object for the client
* @example
* ```typescript
* const client = new BigQueryClient({
* projectId: 'my-gcp-project',
* datasetId: 'analytics_data',
* enableLogging: true,
* enableCache: true,
* cacheTtl: 600000, // 10 minutes
* cacheMaxSize: 500
* });
* ```
*/
constructor(config: BigQueryClientConfig);
/**
* Formats a table name to include the full BigQuery path
*
* @private
* @param {string} table - The table name to format
* @returns {string} Fully qualified table name in format `project.dataset.table`
* @example
* ```typescript
* // Input: 'users'
* // Output: '`my-project.my-dataset.users`'
* ```
*/
private formatTableName;
/**
* Executes a raw SQL query on BigQuery with caching and validation
*
* This method provides the core query execution functionality with built-in:
* - SQL injection protection
* - Parameter validation
* - Query result caching
* - Performance metrics collection
* - Error handling and logging
*
* @template T - The expected type of the query result data
* @param {string} sql - The SQL query string to execute
* @param {any[]} [params] - Optional parameters for parameterized queries
* @returns {Promise<QueryResult<T>>} Promise resolving to query results with metadata
* @throws {BigQueryError} When query validation fails or execution errors occur
*
* @example
* ```typescript
* // Simple query
* const result = await client.query('SELECT * FROM users WHERE active = ?', [true]);
*
* // With type safety
* interface User { id: number; name: string; email: string; }
* const users = await client.query<User>('SELECT id, name, email FROM users');
*
* // Access results
* console.log(result.data); // Query results
* console.log(result.metadata.executionTime); // Performance metrics
* console.log(result.metadata.cacheHit); // Whether result was cached
* ```
*/
query<T>(sql: string, params?: any[]): Promise<QueryResult<T>>;
/**
* Executes a dry run of a SQL query to analyze its execution plan
*
* This method performs query validation and cost estimation without actually
* executing the query or processing any data. Useful for:
* - Query optimization
* - Cost estimation
* - Syntax validation
* - Performance planning
*
* @param {string} sql - The SQL query string to analyze
* @param {any[]} [params] - Optional parameters for parameterized queries
* @returns {Promise<any>} Promise resolving to query job metadata and execution plan
* @throws {Error} When dry run execution fails
*
* @example
* ```typescript
* const plan = await client.explain('SELECT * FROM large_table WHERE date > ?', ['2023-01-01']);
* console.log(plan.statistics.totalBytesProcessed); // Estimated bytes
* console.log(plan.statistics.creationTime); // Query creation time
* ```
*/
explain(sql: string, params?: any[]): Promise<any>;
/**
* Executes a SELECT query with advanced features like JOINs, aggregations, and filtering
*
* This method provides a high-level interface for SELECT operations with support for:
* - Multi-table JOINs with automatic aliasing
* - Aggregate functions (SUM, COUNT, AVG, etc.)
* - Complex WHERE conditions
* - GROUP BY and ORDER BY clauses
* - LIMIT and OFFSET for pagination
* - Automatic SQL injection protection
*
* @param {SelectOptions} options - Configuration object for the SELECT query
* @returns {Promise<{success: boolean, message: string, data: any[]}>} Promise resolving to query results
* @throws {BigQueryError} When query construction or execution fails
*
* @example
* ```typescript
* // Simple SELECT
* const users = await client.select({
* table: 'users',
* columns: ['id', 'name', 'email'],
* where: { active: true },
* limit: 10
* });
*
* // Complex SELECT with JOINs and aggregation
* const report = await client.select({
* table: 'orders',
* columns: {
* orders: ['id', 'total'],
* users: ['name', 'email']
* },
* joins: [{
* table: 'users',
* on: { 'orders.user_id': 'users.id' },
* type: 'INNER'
* }],
* where: { 'orders.status': 'completed' },
* orderBy: [{ column: 'total', direction: 'DESC' }]
* });
* ```
*/
select(options: SelectOptions): Promise<{
success: boolean;
message: string;
data: any[];
}>;
/**
* Inserts multiple rows into a BigQuery table with automatic validation
*
* This method provides a safe and efficient way to insert data with:
* - Automatic SQL injection protection
* - Batch processing for multiple rows
* - Schema validation
* - Error handling and logging
* - Transaction-like behavior
*
* @param {InsertOptions} options - Configuration object containing table and row data
* @returns {Promise<{success: boolean, message: string, affectedRows: number}>} Promise resolving to insert results
* @throws {BigQueryError} When validation fails or insert operation encounters errors
*
* @example
* ```typescript
* // Insert single user
* const result = await client.insert({
* table: 'users',
* rows: [{
* name: 'John Doe',
* email: 'john@example.com',
* active: true,
* created_at: new Date().toISOString()
* }]
* });
*
* // Insert multiple users
* const bulkResult = await client.insert({
* table: 'users',
* rows: [
* { name: 'Alice', email: 'alice@example.com' },
* { name: 'Bob', email: 'bob@example.com' },
* { name: 'Charlie', email: 'charlie@example.com' }
* ]
* });
*
* console.log(`Inserted ${bulkResult.affectedRows} rows`);
* ```
*/
insert(options: InsertOptions): Promise<{
success: boolean;
message: string;
affectedRows: number;
}>;
/**
* Updates existing rows in a BigQuery table with conditional filtering
*
* This method provides secure and efficient row updates with:
* - Mandatory WHERE clause to prevent accidental full table updates
* - Automatic SQL injection protection
* - Field validation and type checking
* - Atomic update operations
* - Comprehensive error handling
*
* @param {UpdateOptions} options - Configuration object containing table, update fields, and conditions
* @returns {Promise<{success: boolean, message: string, affectedRows: number}>} Promise resolving to update results
* @throws {BigQueryError} When validation fails, WHERE clause is empty, or update operation fails
*
* @example
* ```typescript
* // Update user status
* const result = await client.update({
* table: 'users',
* set: {
* active: false,
* updated_at: new Date().toISOString(),
* status: 'inactive'
* },
* where: {
* user_id: 123,
* email: 'user@example.com'
* }
* });
*
* // Update with complex conditions
* const bulkUpdate = await client.update({
* table: 'orders',
* set: { status: 'shipped', shipped_date: '2023-12-01' },
* where: { status: 'processing', priority: 'high' }
* });
*
* console.log(`Updated ${result.affectedRows} rows`);
* ```
*/
update(options: UpdateOptions): Promise<{
success: boolean;
message: string;
affectedRows: number;
}>;
/**
* Deletes rows from a BigQuery table with mandatory filtering conditions
*
* This method provides secure row deletion with built-in safety measures:
* - Mandatory WHERE clause to prevent accidental full table deletion
* - Automatic SQL injection protection
* - Condition validation and type checking
* - Atomic delete operations
* - Comprehensive error handling and logging
*
* @param {DeleteOptions} options - Configuration object containing table and deletion conditions
* @returns {Promise<{success: boolean, message: string, affectedRows: number}>} Promise resolving to deletion results
* @throws {BigQueryError} When validation fails, WHERE clause is empty, or delete operation fails
*
* @example
* ```typescript
* // Delete specific user
* const result = await client.delete({
* table: 'users',
* where: {
* user_id: 123,
* active: false
* }
* });
*
* // Delete old records
* const cleanup = await client.delete({
* table: 'logs',
* where: {
* created_at: '< 2023-01-01',
* level: 'debug'
* }
* });
*
* // Delete with multiple conditions
* const purge = await client.delete({
* table: 'temp_data',
* where: {
* status: 'processed',
* expires_at: '< NOW()'
* }
* });
*
* console.log(`Deleted ${result.affectedRows} rows`);
* ```
*/
delete(options: DeleteOptions): Promise<{
success: boolean;
message: string;
affectedRows: number;
}>;
/**
* Performs a MERGE operation (UPSERT) between two BigQuery tables
*
* This method provides advanced data synchronization capabilities with:
* - Conditional INSERT and UPDATE operations in a single statement
* - Automatic handling of matching and non-matching records
* - Customizable actions for different scenarios
* - Atomic transaction behavior
* - Comprehensive error handling
*
* @param {MergeOptions} options - Configuration object for the MERGE operation
* @returns {Promise<any>} Promise resolving to merge operation results
* @throws {BigQueryError} When merge operation fails or validation errors occur
*
* @example
* ```typescript
* // Basic MERGE operation
* const result = await client.merge({
* targetTable: 'users',
* sourceTable: 'user_updates',
* on: { 'users.id': 'user_updates.user_id' },
* whenMatched: 'UPDATE SET name = source.name, email = source.email',
* whenNotMatched: 'INSERT (id, name, email) VALUES (source.user_id, source.name, source.email)'
* });
*
* // Complex MERGE with multiple conditions
* const syncResult = await client.merge({
* targetTable: 'inventory',
* sourceTable: 'inventory_updates',
* on: {
* 'inventory.product_id': 'inventory_updates.product_id',
* 'inventory.location': 'inventory_updates.location'
* },
* whenMatched: 'UPDATE SET quantity = source.quantity, updated_at = CURRENT_TIMESTAMP()',
* whenNotMatched: 'INSERT (product_id, location, quantity, created_at) VALUES (source.product_id, source.location, source.quantity, CURRENT_TIMESTAMP())'
* });
* ```
*/
merge(options: MergeOptions): Promise<any>;
/**
* Performs high-performance batch insert operations using BigQuery's native batch API
*
* This method is optimized for large-scale data insertion with:
* - Direct BigQuery API integration for maximum performance
* - Automatic batching and chunking of large datasets
* - Schema auto-detection and validation
* - Efficient memory usage for large datasets
* - Comprehensive error handling with partial failure support
*
* @param {string} table - Target table name for batch insertion
* @param {Record<string, any>[]} rows - Array of row objects to insert in batch
* @returns {Promise<any>} Promise resolving to batch insert results with success/failure details
* @throws {BigQueryError} When batch operation fails or validation errors occur
*
* @example
* ```typescript
* // Batch insert large dataset
* const largeDataset = [
* { id: 1, name: 'User 1', email: 'user1@example.com' },
* { id: 2, name: 'User 2', email: 'user2@example.com' },
* // ... thousands more records
* ];
*
* const result = await client.batchInsert('users', largeDataset);
* console.log('Batch insert completed:', result);
*
* // Handle partial failures
* if (result.insertErrors && result.insertErrors.length > 0) {
* console.log('Some rows failed:', result.insertErrors);
* }
* ```
*/
batchInsert(table: string, rows: Record<string, any>[]): Promise<any>;
/**
* Performs real-time streaming insert operations for continuous data ingestion
*
* This method is designed for real-time data streaming scenarios with:
* - Low-latency data insertion for streaming applications
* - Automatic deduplication using insertId
* - Schema evolution support
* - Real-time data availability (no batch processing delays)
* - Optimized for high-frequency, small-batch insertions
*
* @param {string} table - Target table name for streaming insertion
* @param {any[]} rows - Array of row objects to stream insert
* @returns {Promise<any>} Promise resolving to streaming insert results
* @throws {BigQueryError} When streaming operation fails or validation errors occur
*
* @example
* ```typescript
* // Stream real-time events
* const events = [
* {
* timestamp: new Date().toISOString(),
* event_type: 'user_login',
* user_id: 123,
* metadata: { ip: '192.168.1.1', browser: 'Chrome' }
* },
* {
* timestamp: new Date().toISOString(),
* event_type: 'page_view',
* user_id: 123,
* metadata: { page: '/dashboard', duration: 5000 }
* }
* ];
*
* const result = await client.streamInsert('events', events);
* console.log('Events streamed successfully:', result);
*
* // For continuous streaming
* setInterval(async () => {
* const realtimeData = await fetchRealtimeData();
* await client.streamInsert('metrics', realtimeData);
* }, 1000);
* ```
*/
streamInsert(table: string, rows: any[]): Promise<any>;
/**
* Flattens and normalizes nested BigQuery result objects for easier consumption
*
* This utility method processes complex BigQuery results with:
* - Deep cloning to prevent reference issues
* - Nested object flattening for complex data structures
* - JSON serialization/deserialization for data normalization
* - Type preservation where possible
* - Memory-efficient processing for large result sets
*
* @template T - The expected type of the result objects
* @param {T[]} results - Array of nested result objects from BigQuery
* @returns {Promise<T[]>} Promise resolving to flattened and normalized results
* @throws {BigQueryError} When flattening operation fails
*
* @example
* ```typescript
* // Flatten complex nested results
* const complexResults = await client.query('SELECT * FROM nested_table');
* const flattened = await client.flattenResults(complexResults.data);
*
* // Use with type safety
* interface NestedUser {
* id: number;
* profile: { name: string; settings: { theme: string } };
* }
*
* const users = await client.flattenResults<NestedUser>(rawResults);
* console.log(users[0].profile.settings.theme); // Safely access nested data
* ```
*/
flattenResults<T>(results: T[]): Promise<T[]>;
/**
* Generates a unique cache key for query result caching
*
* @private
* @param {string} sql - The SQL query string
* @param {any[]} [params] - Optional query parameters
* @returns {string} Unique cache key combining SQL and parameters
*/
private generateCacheKey;
/**
* Converts BigQuery schema object to human-readable string format
*
* @private
* @param {any} schema - BigQuery schema object
* @returns {string} Formatted schema string for logging and debugging
*/
private generateSchemaString;
/**
* Executes the actual BigQuery operation with parameter binding
*
* @private
* @param {string} sql - The SQL query to execute
* @param {any[]} [params] - Optional parameters for the query
* @returns {Promise<any>} Promise resolving to raw BigQuery results
*/
private executeQuery;
/**
* Creates a materialized view for improved query performance and data freshness
*
* Materialized views provide significant performance benefits by:
* - Pre-computing and storing query results for faster access
* - Automatic refresh scheduling to maintain data freshness
* - Reduced query costs by avoiding repeated computation
* - Support for partitioning for large datasets
* - Transparent query optimization by BigQuery
*
* @param {MaterializedViewConfig} config - Configuration object for materialized view creation
* @returns {Promise<{success: boolean, message: string}>} Promise resolving to creation status
* @throws {BigQueryError} When materialized view creation fails
*
* @example
* ```typescript
* // Create a materialized view for daily sales summary
* const result = await client.createMaterializedView({
* name: 'daily_sales_summary',
* query: `
* SELECT
* DATE(created_at) as sale_date,
* COUNT(*) as total_orders,
* SUM(amount) as total_revenue,
* AVG(amount) as avg_order_value
* FROM orders
* WHERE status = 'completed'
* GROUP BY DATE(created_at)
* `,
* refreshInterval: '1 HOUR',
* partitionField: 'sale_date'
* });
*
* // Create a real-time analytics view
* const analyticsView = await client.createMaterializedView({
* name: 'user_activity_summary',
* query: `
* SELECT
* user_id,
* COUNT(*) as total_actions,
* MAX(timestamp) as last_activity
* FROM user_events
* GROUP BY user_id
* `,
* refreshInterval: '15 MINUTES'
* });
*
* console.log('Materialized view created:', result.success);
* ```
*/
createMaterializedView(config: MaterializedViewConfig): Promise<{
success: boolean;
message: string;
}>;
/**
* Creates a partitioned table for optimized performance and cost management
*
* Partitioned tables provide significant benefits for large datasets:
* - Improved query performance by scanning only relevant partitions
* - Reduced query costs by limiting data processed
* - Better data organization and management
* - Automatic partition pruning for date/time-based queries
* - Support for various partition types (DATE, DATETIME, TIMESTAMP, INTEGER)
*
* @param {PartitionedTableConfig} config - Configuration object for partitioned table creation
* @returns {Promise<{success: boolean, message: string}>} Promise resolving to creation status
* @throws {BigQueryError} When partitioned table creation fails
*
* @example
* ```typescript
* // Create a date-partitioned events table
* const result = await client.createPartitionedTable({
* name: 'user_events_partitioned',
* schema: {
* event_id: 'STRING',
* user_id: 'INTEGER',
* event_type: 'STRING',
* timestamp: 'TIMESTAMP',
* metadata: 'JSON'
* },
* partitionType: 'DATE' as const,
* partitionField: 'timestamp'
* });
*
* // Create an integer-partitioned table for sharding
* const shardedTable = await client.createPartitionedTable({
* name: 'user_data_sharded',
* schema: {
* user_id: 'INTEGER',
* name: 'STRING',
* email: 'STRING',
* created_at: 'TIMESTAMP',
* shard_key: 'INTEGER'
* },
* partitionType: 'RANGE' as const,
* partitionField: 'shard_key'
* });
*
* // Create a time-partitioned table for real-time data
* const timePartitioned = await client.createPartitionedTable({
* name: 'realtime_metrics',
* schema: {
* metric_name: 'STRING',
* value: 'FLOAT',
* timestamp: 'TIMESTAMP',
* tags: 'JSON'
* },
* partitionType: 'TIME' as const,
* partitionField: 'timestamp'
* });
*
* console.log('Partitioned table created:', result.success);
* ```
*/
createPartitionedTable(config: PartitionedTableConfig): Promise<{
success: boolean;
message: string;
}>;
}
export {};