UNPKG

@hotglue/gluestick-ts

Version:

TypeScript version of the gluestick ETL library for hotglue IPaaS platform

113 lines 4.4 kB
import fs from 'fs-extra'; import * as path from 'path'; import { Reader } from './reader.js'; import { toSinger } from './singer.js'; export function toExport(data, name, outputDir, options = {}) { const { exportFormat = process.env.DEFAULT_EXPORT_FORMAT || 'csv', outputFilePrefix = process.env.OUTPUT_FILE_PREFIX, reservedVariables = {} } = options; // Handle unified output override const unifiedOutputKey = `HG_UNIFIED_OUTPUT_${name.toUpperCase()}`; let finalName = process.env[unifiedOutputKey] || name; if (outputFilePrefix) { const formatVariables = buildStringFormatVariables(reservedVariables); const formattedPrefix = formatStrSafely(outputFilePrefix, formatVariables); finalName = `${formattedPrefix}${finalName}`; } // Ensure output directory exists fs.ensureDirSync(outputDir); const outputPath = path.join(outputDir, finalName); switch (exportFormat) { case 'parquet': data.writeParquet(`${outputPath}.parquet`); break; case 'singer': // Get primary key from reader const reader = new Reader(); const keys = options.keys || reader.getPk(name); // Export data as singer format toSinger(data, finalName, outputDir, { keys, allowObjects: options.allowObjects ?? true, schema: options.schema }); break; case 'json': // Convert to JSON array format const jsonData = data.toRecords(); fs.writeFileSync(`${outputPath}.json`, JSON.stringify(jsonData, null, 2)); break; case 'jsonl': // Convert to JSONL format (one JSON object per line) const jsonlData = data.toRecords() .map(record => JSON.stringify(record)) .join('\n'); fs.writeFileSync(`${outputPath}.jsonl`, jsonlData); break; default: // csv data.writeCSV(`${outputPath}.csv`); break; } } export function buildStringFormatVariables(defaultKwargs = {}, useTenantMetadata = true, subtenantDelimiter = '_') { const reservedKeys = Object.keys(defaultKwargs); const finalKwargs = { ...defaultKwargs }; // Build tenant metadata variables if (useTenantMetadata) { const tenantMetadataPath = path.join(process.env.ROOT || '.', 'snapshots', 'tenant-config.json'); if (fs.existsSync(tenantMetadataPath)) { try { const tenantConfig = fs.readJsonSync(tenantMetadataPath); const tenantMetadata = tenantConfig?.hotglue_metadata?.metadata || {}; // Add tenant metadata that doesn't conflict with reserved keys for (const [k, v] of Object.entries(tenantMetadata)) { if (!reservedKeys.includes(k)) { finalKwargs[k] = String(v); } } } catch (error) { console.warn(`Failed to read tenant metadata: ${error}`); } } } // Add environment variables const flowId = process.env.FLOW || ''; const jobId = process.env.JOB_ID || ''; const tap = process.env.TAP || ''; const connector = process.env.CONNECTOR_ID || ''; const tenantId = process.env.TENANT || ''; const envId = process.env.ENV_ID || ''; const splittedTenantId = tenantId.split(subtenantDelimiter); const rootTenantId = splittedTenantId[0] || ''; const subTenantId = splittedTenantId[1] || ''; Object.assign(finalKwargs, { tenant: tenantId, tenant_id: tenantId, root_tenant_id: rootTenantId, sub_tenant_id: subTenantId, env_id: envId, flow_id: flowId, job_id: jobId, tap, connector, }); return finalKwargs; } export function formatStrSafely(strToFormat, formatVariables) { let strOutput = strToFormat; for (const [k, v] of Object.entries(formatVariables)) { if (v) { const regex = new RegExp(`\\{${k}\\}`, 'g'); strOutput = strOutput.replace(regex, v); } } return strOutput; } export function getIndexSafely(arr, index) { try { return arr[index] ?? null; } catch { return null; } } //# sourceMappingURL=etl-utils.js.map