UNPKG

signalk-parquet

Version:

SignalK plugin to save marine data directly to Parquet files with regimen-based control

898 lines (780 loc) 27.2 kB
import * as fs from 'fs-extra'; import * as path from 'path'; import { glob } from 'glob'; import { PluginConfig, PathConfig, DataRecord, PluginState, NormalizedDelta, } from './types'; import { extractCommandName } from './commands'; import { Context, Delta, hasValues, Path, PathValue, ServerAPI, Update, } from '@signalk/server-api'; // AWS S3 for file upload // eslint-disable-next-line @typescript-eslint/no-explicit-any let S3Client: any, // eslint-disable-next-line @typescript-eslint/no-explicit-any PutObjectCommand: any, // eslint-disable-next-line @typescript-eslint/no-explicit-any ListObjectsV2Command: any, // eslint-disable-next-line @typescript-eslint/no-explicit-any HeadObjectCommand: any; let appInstance: ServerAPI; export async function initializeS3(config: PluginConfig, app: ServerAPI): Promise<void> { appInstance = app; // Initialize S3 client if enabled if (config.s3Upload.enabled) { // Wait for AWS SDK import to complete try { if (!S3Client) { const awsS3 = await import('@aws-sdk/client-s3'); S3Client = awsS3.S3Client; PutObjectCommand = awsS3.PutObjectCommand; ListObjectsV2Command = awsS3.ListObjectsV2Command; HeadObjectCommand = awsS3.HeadObjectCommand; } } catch (importError) { S3Client = undefined; } } } export function createS3Client(config: PluginConfig, app: ServerAPI): any { if (!config.s3Upload.enabled || !S3Client) { return undefined; } try { const s3Config: { region: string; credentials?: { accessKeyId: string; secretAccessKey: string }; } = { region: config.s3Upload.region || 'us-east-1', }; // Add credentials if provided if ( config.s3Upload.accessKeyId && config.s3Upload.secretAccessKey ) { s3Config.credentials = { accessKeyId: config.s3Upload.accessKeyId, secretAccessKey: config.s3Upload.secretAccessKey, }; } const s3Client = new S3Client(s3Config); return s3Client; } catch (error) { return undefined; } } // Subscribe to command paths that control regimens using proper subscription manager export function subscribeToCommandPaths( currentPaths: PathConfig[], state: PluginState, config: PluginConfig, app: ServerAPI ): void { const commandPaths = currentPaths.filter( (pathConfig: PathConfig) => pathConfig && pathConfig.path && pathConfig.path.startsWith('commands.') && pathConfig.enabled ); if (commandPaths.length === 0) return; const commandSubscription = { context: 'vessels.self' as Context, subscribe: commandPaths.map((pathConfig: PathConfig) => ({ path: pathConfig.path, period: 1000, // Check commands every second policy: 'fixed' as const, })), }; app.subscriptionmanager.subscribe( commandSubscription, state.unsubscribes, (subscriptionError: unknown) => { }, (delta: Delta) => { // Process each update in the delta if (delta.updates) { delta.updates.forEach((update: Update) => { if (hasValues(update)) { update.values.forEach((valueUpdate: PathValue) => { const pathConfig = commandPaths.find( p => p.path === valueUpdate.path ); if (pathConfig) { handleCommandMessage(valueUpdate, pathConfig, config, update, state, app); } }); } }); } } ); commandPaths.forEach(pathConfig => { state.subscribedPaths.add(pathConfig.path); }); } // Handle command messages (regimen control) - now receives complete delta structure function handleCommandMessage( valueUpdate: PathValue, pathConfig: PathConfig, config: PluginConfig, update: Update, state: PluginState, app: ServerAPI ): void { try { // Check source filter if specified for commands too if (pathConfig.source && pathConfig.source.trim() !== '') { const messageSource = update.$source || (update.source ? update.source.label : null); if (messageSource !== pathConfig.source.trim()) { return; } } if (valueUpdate.value !== undefined) { const commandName = extractCommandName(pathConfig.path); const isActive = Boolean(valueUpdate.value); if (isActive) { state.activeRegimens.add(commandName); } else { state.activeRegimens.delete(commandName); } // Debug active regimens state // Buffer this command change with complete metadata const bufferKey = `${pathConfig.context || 'vessels.self'}:${pathConfig.path}`; bufferData( bufferKey, { received_timestamp: new Date().toISOString(), signalk_timestamp: update.timestamp || new Date().toISOString(), context: 'vessels.self', path: valueUpdate.path, value: valueUpdate.value, source: update.source ? JSON.stringify(update.source) : undefined, source_label: update.$source || (update.source ? update.source.label : undefined), source_type: update.source ? update.source.type : undefined, source_pgn: update.source ? update.source.pgn : undefined, source_src: update.source ? update.source.src : undefined, }, config, state, app ); } } catch (error) { } } // Helper function to handle wildcard contexts function handleWildcardContext(pathConfig: PathConfig): PathConfig { const context = pathConfig.context || 'vessels.self'; if (context === 'vessels.*') { // For vessels.*, we create a subscription that will receive deltas from any vessel // The actual filtering by MMSI will happen in the delta handler return { ...pathConfig, context: 'vessels.*' as Context, // Keep the wildcard for the subscription }; } // Not a wildcard, return as-is return pathConfig; } // Helper function to check if a vessel should be excluded based on MMSI function shouldExcludeVessel( vesselContext: string, pathConfig: PathConfig, app: ServerAPI ): boolean { if (!pathConfig.excludeMMSI || pathConfig.excludeMMSI.length === 0) { return false; // No exclusions specified } try { // For vessels.self, use getSelfPath if (vesselContext === 'vessels.self') { const mmsiData = app.getSelfPath('mmsi'); if (mmsiData && mmsiData.value) { const mmsi = String(mmsiData.value); return pathConfig.excludeMMSI.includes(mmsi); } } else { // For other vessels, we would need to get their MMSI from the delta or other means // For now, we'll skip MMSI filtering for other vessels } } catch (error) { } return false; // Don't exclude if we can't determine MMSI } // Update data path subscriptions based on active regimens export function updateDataSubscriptions( currentPaths: PathConfig[], state: PluginState, config: PluginConfig, app: ServerAPI ): void { // First, unsubscribe from all existing subscriptions state.unsubscribes.forEach(unsubscribe => { if (typeof unsubscribe === 'function') { unsubscribe(); } }); state.unsubscribes = []; state.subscribedPaths.clear(); // Re-subscribe to command paths subscribeToCommandPaths(currentPaths, state, config, app); // Now subscribe to data paths using currentPaths const dataPaths = currentPaths.filter( (pathConfig: PathConfig) => pathConfig && pathConfig.path && !pathConfig.path.startsWith('commands.') ); const shouldSubscribePaths = dataPaths.filter((pathConfig: PathConfig) => shouldSubscribeToPath(pathConfig, state, app) ); // Handle wildcard contexts (like vessels.*) const processedPaths: PathConfig[] = shouldSubscribePaths.map(pathConfig => handleWildcardContext(pathConfig) ); if (processedPaths.length === 0) { return; } // Group paths by context for separate subscriptions const contextGroups = new Map<Context, PathConfig[]>(); processedPaths.forEach((pathConfig: PathConfig) => { const context = (pathConfig.context || 'vessels.self') as Context; if (!contextGroups.has(context)) { contextGroups.set(context, []); } contextGroups.get(context)!.push(pathConfig); }); // Use app.streambundle approach as recommended by SignalK developer // This avoids server arbitration and provides true source filtering contextGroups.forEach((pathConfigs, context) => { pathConfigs.forEach((pathConfig: PathConfig) => { // Show MMSI exclusion config for troubleshooting if (pathConfig.excludeMMSI && pathConfig.excludeMMSI.length > 0) { } // Create individual stream for each path (developer's recommended approach) const stream = app.streambundle .getBus(pathConfig.path as Path) .filter((normalizedDelta: NormalizedDelta) => { // Filter by source if specified if (pathConfig.source && pathConfig.source.trim() !== '') { const expectedSource = pathConfig.source.trim(); const actualSource = normalizedDelta.$source; if (actualSource !== expectedSource) { return false; } } // Filter by context const targetContext = pathConfig.context || 'vessels.self'; if (targetContext === 'vessels.*') { // For wildcard, accept any vessel context if (!normalizedDelta.context.startsWith('vessels.')) { return false; } } else if (targetContext === 'vessels.self') { // For vessels.self, check if this is the server's own vessel const selfContext = app.selfContext; const selfVessel = app.getSelfPath('') || {}; const selfMMSI = selfVessel.mmsi; const selfUuid = app.getSelfPath('uuid'); // Check if the context matches the server's self vessel let isSelfVessel = false; if (normalizedDelta.context === 'vessels.self') { isSelfVessel = true; } else if (normalizedDelta.context === selfContext) { isSelfVessel = true; } else if ( selfMMSI && normalizedDelta.context.includes(selfMMSI) ) { isSelfVessel = true; } else if ( selfUuid && normalizedDelta.context.includes(selfUuid) ) { isSelfVessel = true; } if (!isSelfVessel) { return false; } } else { // For specific context, match exactly if (normalizedDelta.context !== targetContext) { return false; } } // MMSI exclusion filtering if (pathConfig.excludeMMSI && pathConfig.excludeMMSI.length > 0) { const contextHasExcludedMMSI = pathConfig.excludeMMSI.some(mmsi => normalizedDelta.context.includes(mmsi) ); if (contextHasExcludedMMSI) { return false; } } return true; }) .debounceImmediate(1000) // Built-in debouncing as recommended .onValue((normalizedDelta: NormalizedDelta) => { handleStreamData(normalizedDelta, pathConfig, config, state, app); }); // Store stream reference for cleanup (instead of unsubscribe functions) state.streamSubscriptions = state.streamSubscriptions || []; state.streamSubscriptions.push(stream); state.subscribedPaths.add(pathConfig.path); }); }); } // Determine if we should subscribe to a path based on regimens function shouldSubscribeToPath( pathConfig: PathConfig, state: PluginState, app: ServerAPI ): boolean { // Always subscribe if explicitly enabled if (pathConfig.enabled) { return true; } // Check if any required regimens are active if (pathConfig.regimen) { const requiredRegimens = pathConfig.regimen.split(',').map(r => r.trim()); const hasActiveRegimen = requiredRegimens.some(regimen => state.activeRegimens.has(regimen) ); return hasActiveRegimen; } return false; } // New handler for streambundle data (developer's recommended approach) function handleStreamData( normalizedDelta: NormalizedDelta, pathConfig: PathConfig, config: PluginConfig, state: PluginState, app: ServerAPI ): void { try { // Retrieve metadata for this path let metadata: string | undefined; try { // eslint-disable-next-line @typescript-eslint/no-explicit-any const pathMetadata = (app as any).getMetadata?.(normalizedDelta.path); if (pathMetadata) { metadata = JSON.stringify(pathMetadata); } } catch (error) { // Metadata retrieval failed, continue without it } const record: DataRecord = { received_timestamp: new Date().toISOString(), signalk_timestamp: normalizedDelta.timestamp || new Date().toISOString(), context: normalizedDelta.context || pathConfig.context || 'vessels.self', path: normalizedDelta.path, value: null, value_json: undefined, source: normalizedDelta.source ? JSON.stringify(normalizedDelta.source) : undefined, source_label: normalizedDelta.$source || undefined, source_type: normalizedDelta.source ? normalizedDelta.source.type : undefined, source_pgn: normalizedDelta.source ? normalizedDelta.source.pgn : undefined, source_src: normalizedDelta.source ? normalizedDelta.source.src : undefined, meta: metadata, }; // Handle complex values if ( typeof normalizedDelta.value === 'object' && normalizedDelta.value !== null ) { record.value_json = JSON.stringify(normalizedDelta.value); // Extract key properties as columns for easier querying Object.entries(normalizedDelta.value).forEach(([key, val]) => { if ( typeof val === 'string' || typeof val === 'number' || typeof val === 'boolean' ) { // eslint-disable-next-line @typescript-eslint/no-explicit-any (record as any)[`value_${key}`] = val; } }); } else { record.value = normalizedDelta.value; } // Use actual context + path as buffer key to separate data from different vessels const bufferKey = `${normalizedDelta.context}:${pathConfig.path}`; bufferData(bufferKey, record, config, state, app); } catch (error) { } } // Buffer data and trigger save if buffer is full function bufferData( signalkPath: string, record: DataRecord, config: PluginConfig, state: PluginState, app: ServerAPI ): void { if (!state.dataBuffers.has(signalkPath)) { state.dataBuffers.set(signalkPath, []); } const buffer = state.dataBuffers.get(signalkPath)!; buffer.push(record); if (buffer.length >= config.bufferSize) { // Extract the actual SignalK path from the buffer key (context:path format) // Find the separator between context and path - look for the last colon followed by a valid SignalK path const pathMatch = signalkPath.match(/^.*:([a-zA-Z][a-zA-Z0-9._]*)$/); const actualPath = pathMatch ? pathMatch[1] : signalkPath; const urnMatch = signalkPath.match(/^([^:]+):/); const urn = urnMatch ? urnMatch[1] : 'vessels.self'; saveBufferToParquet(actualPath, buffer, config, state, app); state.dataBuffers.set(signalkPath, []); // Clear buffer } } // Save all buffers (called periodically and on shutdown) export function saveAllBuffers(config: PluginConfig, state: PluginState, app: ServerAPI): void { state.dataBuffers.forEach((buffer, signalkPath) => { if (buffer.length > 0) { // Extract the actual SignalK path from the buffer key (context:path format) // Find the separator between context and path - look for the last colon followed by a valid SignalK path const pathMatch = signalkPath.match(/^.*:([a-zA-Z][a-zA-Z0-9._]*)$/); const actualPath = pathMatch ? pathMatch[1] : signalkPath; const urnMatch = signalkPath.match(/^([^:]+):/); const urn = urnMatch ? urnMatch[1] : 'vessels.self'; saveBufferToParquet(actualPath, buffer, config, state, app); state.dataBuffers.set(signalkPath, []); // Clear buffer } }); } // Save buffer to Parquet file async function saveBufferToParquet( signalkPath: string, buffer: DataRecord[], config: PluginConfig, state: PluginState, app: ServerAPI ): Promise<void> { try { // Get context from first record in buffer (all records in buffer have same path/context) const context = buffer.length > 0 ? buffer[0].context : 'vessels.self'; // Create proper directory structure let contextPath: string; if (context === 'vessels.self') { // Clean the self context for filesystem usage (replace dots with slashes, colons with underscores) contextPath = app.selfContext.replace(/\./g, '/').replace(/:/g, '_'); } else if (context.startsWith('vessels.')) { // Extract vessel identifier and clean it for filesystem const vesselId = context.replace('vessels.', '').replace(/:/g, '_'); contextPath = `vessels/${vesselId}`; } else if (context.startsWith('meteo.')) { // Extract meteo station identifier and clean it for filesystem const meteoId = context.replace('meteo.', '').replace(/:/g, '_'); contextPath = `meteo/${meteoId}`; } else { // Fallback: clean the entire context contextPath = context.replace(/:/g, '_').replace(/\./g, '/'); } const dirPath = path.join( config.outputDirectory, contextPath, signalkPath.replace(/\./g, '/') ); await fs.ensureDir(dirPath); // Generate filename with timestamp const timestamp = new Date() .toISOString() .replace(/[:.]/g, '') .slice(0, 15); const fileExt = config.fileFormat === 'csv' ? 'csv' : config.fileFormat === 'parquet' ? 'parquet' : 'json'; const filename = `${config.filenamePrefix}_${timestamp}.${fileExt}`; const filepath = path.join(dirPath, filename); // Use ParquetWriter to save in the configured format const savedPath = await state.parquetWriter!.writeRecords( filepath, buffer ); // Upload to S3 if enabled and timing is real-time if (config.s3Upload.enabled && config.s3Upload.timing === 'realtime') { await uploadToS3(savedPath, config, state, app); } } catch (error) { } } // Initialize regimen states from current API values at startup export function initializeRegimenStates( currentPaths: PathConfig[], state: PluginState, app: ServerAPI ): void { const commandPaths = currentPaths.filter( (pathConfig: PathConfig) => pathConfig && pathConfig.path && pathConfig.path.startsWith('commands.') && pathConfig.enabled ); commandPaths.forEach((pathConfig: PathConfig) => { try { // Get current value from SignalK API const currentData = app.getSelfPath(pathConfig.path); if (currentData !== undefined && currentData !== null) { // Check if there's source information const shouldProcess = true; // If source filter is specified, check it if (pathConfig.source && pathConfig.source.trim() !== '') { // For startup, we need to check the API source info // This is a simplified check - in real deltas we get more source info // For now, we'll process the value if it exists and log a warning // In practice, you might want to check the source here too } if (shouldProcess && currentData.value !== undefined) { const commandName = extractCommandName(pathConfig.path); const isActive = Boolean(currentData.value); if (isActive) { state.activeRegimens.add(commandName); } else { state.activeRegimens.delete(commandName); } } } else { } } catch (error) { } }); } // Startup consolidation for missed previous days (excludes current day) export async function consolidateMissedDays(config: PluginConfig, state: PluginState, app: ServerAPI): Promise<void> { try { // Get list of all date directories that exist const outputDir = config.outputDirectory; if (!(await fs.pathExists(outputDir))) { return; } // Find all non-consolidated files older than today const today = new Date(); today.setUTCHours(0, 0, 0, 0); const pattern = path.join(outputDir, '**/*.parquet'); const files = await glob(pattern); // Extract dates from files and find days that need consolidation const datesNeedingConsolidation = new Set<string>(); for (const file of files) { // Skip already consolidated files if (file.includes('_consolidated.parquet')) { continue; } // Extract date from filename (format: signalk_data_2025-07-14T1847.parquet) const filename = path.basename(file); const dateMatch = filename.match( /(\d{4})-(\d{2})-(\d{2})T\d{4}\.parquet$/ ); if (dateMatch) { const year = parseInt(dateMatch[1]); const month = parseInt(dateMatch[2]) - 1; // Month is 0-based const day = parseInt(dateMatch[3]); const fileDate = new Date(year, month, day); fileDate.setUTCHours(0, 0, 0, 0); // Only consolidate if file is from before today if (fileDate < today) { const dateStr = `${year}-${month + 1 < 10 ? '0' : ''}${month + 1}-${day < 10 ? '0' : ''}${day}`; datesNeedingConsolidation.add(dateStr); } } } // Consolidate each missed day for (const dateStr of datesNeedingConsolidation) { // Parse date string format: 2025-07-14 const [yearStr, monthStr, dayStr] = dateStr.split('-'); const date = new Date( parseInt(yearStr), parseInt(monthStr) - 1, parseInt(dayStr) ); const consolidatedCount = await state.parquetWriter!.consolidateDaily( config.outputDirectory, date, config.filenamePrefix ); if (consolidatedCount > 0) { // Upload consolidated files to S3 if enabled and timing is consolidation if ( config.s3Upload.enabled && config.s3Upload.timing === 'consolidation' ) { await uploadConsolidatedFilesToS3(config, date, state, app); } else { } } } if (datesNeedingConsolidation.size > 0) { } else { } } catch (error) { } } // Daily consolidation function export async function consolidateYesterday(config: PluginConfig, state: PluginState, app: ServerAPI): Promise<void> { try { const yesterday = new Date(); yesterday.setUTCDate(yesterday.getUTCDate() - 1); const consolidatedCount = await state.parquetWriter!.consolidateDaily( config.outputDirectory, yesterday, config.filenamePrefix ); if (consolidatedCount > 0) { // Upload consolidated files to S3 if enabled and timing is consolidation if ( config.s3Upload.enabled && config.s3Upload.timing === 'consolidation' ) { await uploadConsolidatedFilesToS3(config, yesterday, state, app); } } } catch (error) { } } // Upload all existing consolidated files to S3 (for catching up after BigInt fix) export async function uploadAllConsolidatedFilesToS3( config: PluginConfig, state: PluginState, app: ServerAPI ): Promise<void> { try { // Find all consolidated parquet files const consolidatedPattern = `**/*_consolidated.parquet`; const consolidatedFiles = await glob(consolidatedPattern, { cwd: config.outputDirectory, absolute: true, nodir: true, }); let uploadedCount = 0; for (const filePath of consolidatedFiles) { const success = await uploadToS3(filePath, config, state, app); if (success) uploadedCount++; } } catch (error) { } } // Upload consolidated files to S3 async function uploadConsolidatedFilesToS3( config: PluginConfig, date: Date, state: PluginState, app: ServerAPI ): Promise<void> { try { const dateStr = date.toISOString().split('T')[0]; const consolidatedPattern = `**/*_${dateStr}_consolidated.parquet`; // Find all consolidated files for the date const consolidatedFiles = await glob(consolidatedPattern, { cwd: config.outputDirectory, absolute: true, nodir: true, }); // Upload each consolidated file for (const filePath of consolidatedFiles) { await uploadToS3(filePath, config, state, app); } } catch (error) { } } // S3 upload function async function uploadToS3( filePath: string, config: PluginConfig, state: PluginState, app: ServerAPI ): Promise<boolean> { if (!config.s3Upload.enabled || !state.s3Client || !PutObjectCommand) { return false; } try { // Generate S3 key first const relativePath = path.relative(config.outputDirectory, filePath); let s3Key = relativePath; if (config.s3Upload.keyPrefix) { const prefix = config.s3Upload.keyPrefix.endsWith('/') ? config.s3Upload.keyPrefix : `${config.s3Upload.keyPrefix}/`; s3Key = `${prefix}${relativePath}`; } // Check if file exists in S3 and compare timestamps const localStats = await fs.stat(filePath); let shouldUpload = true; try { if (HeadObjectCommand) { const headCommand = new HeadObjectCommand({ Bucket: config.s3Upload.bucket, Key: s3Key, }); const s3Object = await state.s3Client.send(headCommand); if (s3Object.LastModified) { const s3LastModified = new Date(s3Object.LastModified); const localLastModified = new Date(localStats.mtime); if (localLastModified <= s3LastModified) { shouldUpload = false; } else { } } } // eslint-disable-next-line @typescript-eslint/no-explicit-any } catch (headError: any) { if ( headError.name === 'NotFound' || headError.$metadata?.httpStatusCode === 404 ) { shouldUpload = true; } else { shouldUpload = true; } } if (!shouldUpload) { return true; // Consider it successful since file is already up to date } // Read the file const fileContent = await fs.readFile(filePath); // Upload to S3 const command = new PutObjectCommand({ Bucket: config.s3Upload.bucket, Key: s3Key, Body: fileContent, ContentType: filePath.endsWith('.parquet') ? 'application/octet-stream' : 'application/json', }); await state.s3Client.send(command); // Delete local file if configured if (config.s3Upload.deleteAfterUpload) { await fs.unlink(filePath); } return true; } catch (error) { return false; } }