s3db.js
Version:
Use AWS S3, the world's most reliable document storage, as a database with this ORM.
442 lines (387 loc) • 14.6 kB
JavaScript
import tryFn from "#src/concerns/try-fn.js";
import BaseReplicator from './base-replicator.class.js';
/**
* BigQuery Replicator - Replicate data to Google BigQuery tables
*
* ⚠️ REQUIRED DEPENDENCY: You must install the Google Cloud BigQuery SDK:
* ```bash
* pnpm add @google-cloud/bigquery
* ```
*
* Configuration:
* @param {string} projectId - Google Cloud project ID (required)
* @param {string} datasetId - BigQuery dataset ID (required)
* @param {Object} credentials - Service account credentials object (optional)
* @param {string} location - BigQuery dataset location/region (default: 'US')
* @param {string} logTable - Table name for operation logging (optional)
*
* @example
* new BigqueryReplicator({
* projectId: 'my-gcp-project',
* datasetId: 'analytics',
* credentials: JSON.parse(Buffer.from(GOOGLE_CREDENTIALS, 'base64').toString())
* }, {
* users: {
* table: 'users_table',
* transform: (data) => ({ ...data, ip: data.ip || 'unknown' })
* },
* orders: 'orders_table'
* })
*
* See PLUGINS.md for comprehensive configuration documentation.
*/
class BigqueryReplicator extends BaseReplicator {
constructor(config = {}, resources = {}) {
super(config);
this.projectId = config.projectId;
this.datasetId = config.datasetId;
this.bigqueryClient = null;
this.credentials = config.credentials;
this.location = config.location || 'US';
this.logTable = config.logTable;
// Parse resources configuration
this.resources = this.parseResourcesConfig(resources);
}
parseResourcesConfig(resources) {
const parsed = {};
for (const [resourceName, config] of Object.entries(resources)) {
if (typeof config === 'string') {
// Short form: just table name
parsed[resourceName] = [{
table: config,
actions: ['insert'],
transform: null
}];
} else if (Array.isArray(config)) {
// Array form: multiple table mappings
parsed[resourceName] = config.map(item => {
if (typeof item === 'string') {
return { table: item, actions: ['insert'], transform: null };
}
return {
table: item.table,
actions: item.actions || ['insert'],
transform: item.transform || null
};
});
} else if (typeof config === 'object') {
// Single object form
parsed[resourceName] = [{
table: config.table,
actions: config.actions || ['insert'],
transform: config.transform || null
}];
}
}
return parsed;
}
validateConfig() {
const errors = [];
if (!this.projectId) errors.push('projectId is required');
if (!this.datasetId) errors.push('datasetId is required');
if (Object.keys(this.resources).length === 0) errors.push('At least one resource must be configured');
// Validate resource configurations
for (const [resourceName, tables] of Object.entries(this.resources)) {
for (const tableConfig of tables) {
if (!tableConfig.table) {
errors.push(`Table name is required for resource '${resourceName}'`);
}
if (!Array.isArray(tableConfig.actions) || tableConfig.actions.length === 0) {
errors.push(`Actions array is required for resource '${resourceName}'`);
}
const validActions = ['insert', 'update', 'delete'];
const invalidActions = tableConfig.actions.filter(action => !validActions.includes(action));
if (invalidActions.length > 0) {
errors.push(`Invalid actions for resource '${resourceName}': ${invalidActions.join(', ')}. Valid actions: ${validActions.join(', ')}`);
}
if (tableConfig.transform && typeof tableConfig.transform !== 'function') {
errors.push(`Transform must be a function for resource '${resourceName}'`);
}
}
}
return { isValid: errors.length === 0, errors };
}
async initialize(database) {
await super.initialize(database);
const [ok, err, sdk] = await tryFn(() => import('@google-cloud/bigquery'));
if (!ok) {
if (this.config.verbose) {
console.warn(`[BigqueryReplicator] Failed to import BigQuery SDK: ${err.message}`);
}
this.emit('initialization_error', { replicator: this.name, error: err.message });
throw err;
}
const { BigQuery } = sdk;
this.bigqueryClient = new BigQuery({
projectId: this.projectId,
credentials: this.credentials,
location: this.location
});
this.emit('initialized', {
replicator: this.name,
projectId: this.projectId,
datasetId: this.datasetId,
resources: Object.keys(this.resources)
});
}
shouldReplicateResource(resourceName) {
return this.resources.hasOwnProperty(resourceName);
}
shouldReplicateAction(resourceName, operation) {
if (!this.resources[resourceName]) return false;
return this.resources[resourceName].some(tableConfig =>
tableConfig.actions.includes(operation)
);
}
getTablesForResource(resourceName, operation) {
if (!this.resources[resourceName]) return [];
return this.resources[resourceName]
.filter(tableConfig => tableConfig.actions.includes(operation))
.map(tableConfig => ({
table: tableConfig.table,
transform: tableConfig.transform
}));
}
applyTransform(data, transformFn) {
// First, clean internal fields that shouldn't go to BigQuery
let cleanData = this._cleanInternalFields(data);
if (!transformFn) return cleanData;
let transformedData = JSON.parse(JSON.stringify(cleanData));
return transformFn(transformedData);
}
_cleanInternalFields(data) {
if (!data || typeof data !== 'object') return data;
const cleanData = { ...data };
// Remove internal fields that start with $ or _
Object.keys(cleanData).forEach(key => {
if (key.startsWith('$') || key.startsWith('_')) {
delete cleanData[key];
}
});
return cleanData;
}
async replicate(resourceName, operation, data, id, beforeData = null) {
if (!this.enabled || !this.shouldReplicateResource(resourceName)) {
return { skipped: true, reason: 'resource_not_included' };
}
if (!this.shouldReplicateAction(resourceName, operation)) {
return { skipped: true, reason: 'action_not_included' };
}
const tableConfigs = this.getTablesForResource(resourceName, operation);
if (tableConfigs.length === 0) {
return { skipped: true, reason: 'no_tables_for_action' };
}
const results = [];
const errors = [];
const [ok, err, result] = await tryFn(async () => {
const dataset = this.bigqueryClient.dataset(this.datasetId);
// Replicate to all applicable tables
for (const tableConfig of tableConfigs) {
const [okTable, errTable] = await tryFn(async () => {
const table = dataset.table(tableConfig.table);
let job;
if (operation === 'insert') {
const transformedData = this.applyTransform(data, tableConfig.transform);
try {
job = await table.insert([transformedData]);
} catch (error) {
// Extract detailed BigQuery error information
const { errors, response } = error;
if (this.config.verbose) {
console.error('[BigqueryReplicator] BigQuery insert error details:');
if (errors) console.error(JSON.stringify(errors, null, 2));
if (response) console.error(JSON.stringify(response, null, 2));
}
throw error;
}
} else if (operation === 'update') {
const transformedData = this.applyTransform(data, tableConfig.transform);
const keys = Object.keys(transformedData).filter(k => k !== 'id');
const setClause = keys.map(k => `${k} = @${k}`).join(', ');
const params = { id, ...transformedData };
const query = `UPDATE \`${this.projectId}.${this.datasetId}.${tableConfig.table}\` SET ${setClause} WHERE id = `;
// Retry logic for streaming buffer issues
const maxRetries = 2;
let lastError = null;
for (let attempt = 1; attempt <= maxRetries; attempt++) {
const [ok, error] = await tryFn(async () => {
const [updateJob] = await this.bigqueryClient.createQueryJob({
query,
params,
location: this.location
});
await updateJob.getQueryResults();
return [updateJob];
});
if (ok) {
job = ok;
break;
} else {
lastError = error;
// Enhanced error logging for BigQuery update operations
if (this.config.verbose) {
console.warn(`[BigqueryReplicator] Update attempt ${attempt} failed: ${error.message}`);
if (error.errors) {
console.error('[BigqueryReplicator] BigQuery update error details:');
console.error('Errors:', JSON.stringify(error.errors, null, 2));
}
}
// If it's streaming buffer error and not the last attempt
if (error?.message?.includes('streaming buffer') && attempt < maxRetries) {
const delaySeconds = 30;
if (this.config.verbose) {
console.warn(`[BigqueryReplicator] Retrying in ${delaySeconds} seconds due to streaming buffer issue`);
}
await new Promise(resolve => setTimeout(resolve, delaySeconds * 1000));
continue;
}
throw error;
}
}
if (!job) throw lastError;
} else if (operation === 'delete') {
const query = `DELETE FROM \`${this.projectId}.${this.datasetId}.${tableConfig.table}\` WHERE id = `;
try {
const [deleteJob] = await this.bigqueryClient.createQueryJob({
query,
params: { id },
location: this.location
});
await deleteJob.getQueryResults();
job = [deleteJob];
} catch (error) {
// Enhanced error logging for BigQuery delete operations
if (this.config.verbose) {
console.error('[BigqueryReplicator] BigQuery delete error details:');
console.error('Query:', query);
if (error.errors) console.error('Errors:', JSON.stringify(error.errors, null, 2));
if (error.response) console.error('Response:', JSON.stringify(error.response, null, 2));
}
throw error;
}
} else {
throw new Error(`Unsupported operation: ${operation}`);
}
results.push({
table: tableConfig.table,
success: true,
jobId: job[0]?.id
});
});
if (!okTable) {
errors.push({
table: tableConfig.table,
error: errTable.message
});
}
}
// Log operation if logTable is configured
if (this.logTable) {
const [okLog, errLog] = await tryFn(async () => {
const logTable = dataset.table(this.logTable);
await logTable.insert([{
resource_name: resourceName,
operation,
record_id: id,
data: JSON.stringify(data),
timestamp: new Date().toISOString(),
source: 's3db-replicator'
}]);
});
if (!okLog) {
// Don't fail the main operation if logging fails
}
}
const success = errors.length === 0;
// Log errors if any occurred
if (errors.length > 0) {
console.warn(`[BigqueryReplicator] Replication completed with errors for ${resourceName}:`, errors);
}
this.emit('replicated', {
replicator: this.name,
resourceName,
operation,
id,
tables: tableConfigs.map(t => t.table),
results,
errors,
success
});
return {
success,
results,
errors,
tables: tableConfigs.map(t => t.table)
};
});
if (ok) return result;
if (this.config.verbose) {
console.warn(`[BigqueryReplicator] Replication failed for ${resourceName}: ${err.message}`);
}
this.emit('replicator_error', {
replicator: this.name,
resourceName,
operation,
id,
error: err.message
});
return { success: false, error: err.message };
}
async replicateBatch(resourceName, records) {
const results = [];
const errors = [];
for (const record of records) {
const [ok, err, res] = await tryFn(() => this.replicate(
resourceName,
record.operation,
record.data,
record.id,
record.beforeData
));
if (ok) {
results.push(res);
} else {
if (this.config.verbose) {
console.warn(`[BigqueryReplicator] Batch replication failed for record ${record.id}: ${err.message}`);
}
errors.push({ id: record.id, error: err.message });
}
}
// Log errors if any occurred during batch processing
if (errors.length > 0) {
console.warn(`[BigqueryReplicator] Batch replication completed with ${errors.length} error(s) for ${resourceName}:`, errors);
}
return {
success: errors.length === 0,
results,
errors
};
}
async testConnection() {
const [ok, err] = await tryFn(async () => {
if (!this.bigqueryClient) await this.initialize();
const dataset = this.bigqueryClient.dataset(this.datasetId);
await dataset.getMetadata();
return true;
});
if (ok) return true;
if (this.config.verbose) {
console.warn(`[BigqueryReplicator] Connection test failed: ${err.message}`);
}
this.emit('connection_error', { replicator: this.name, error: err.message });
return false;
}
async cleanup() {
// BigQuery SDK doesn't need cleanup
}
getStatus() {
return {
...super.getStatus(),
projectId: this.projectId,
datasetId: this.datasetId,
resources: this.resources,
logTable: this.logTable
};
}
}
export default BigqueryReplicator;