UNPKG

signalk-parquet

Version:

SignalK plugin to save marine data directly to Parquet files with regimen-based control

539 lines (475 loc) 18.7 kB
import { Router, Request, Response } from 'express'; import { AggregateMethod, DataResult, FromToContextRequest, PathSpec, } from './HistoryAPI-types'; import { ZonedDateTime, ZoneOffset, ZoneId, LocalDateTime } from '@js-joda/core'; import { Context, Path, Timestamp } from '@signalk/server-api'; import { ParamsDictionary } from 'express-serve-static-core'; import { ParsedQs } from 'qs'; import { DuckDBInstance } from '@duckdb/node-api'; import { toContextFilePath } from '.'; import path from 'path'; import { getAvailablePathsArray } from './utils/path-discovery'; export function registerHistoryApiRoute( router: Pick<Router, 'get'>, selfId: string, dataDir: string, debug: (k: string) => void, app: any ) { const historyApi = new HistoryAPI(selfId, dataDir); router.get('/signalk/v1/history/values', (req: Request, res: Response) => { const { from, to, context, shouldRefresh } = getRequestParams( req as FromToContextRequest, selfId ); historyApi.getValues(context, from, to, shouldRefresh, debug, req, res); }); router.get('/signalk/v1/history/contexts', (req: Request, res: Response) => { //TODO implement retrieval of contexts for the given period res.json([`vessels.${selfId}`] as Context[]); }); router.get('/signalk/v1/history/paths', (req: Request, res: Response) => { try { const paths = getAvailablePathsArray(dataDir, app); res.json(paths); } catch (error) { res.status(500).json({ error: (error as Error).message }); } }); // Also register as plugin-style routes for testing router.get('/api/history/values', (req: Request, res: Response) => { const { from, to, context, shouldRefresh } = getRequestParams( req as FromToContextRequest, selfId ); historyApi.getValues(context, from, to, shouldRefresh, debug, req, res); }); router.get('/api/history/contexts', (req: Request, res: Response) => { res.json([`vessels.${selfId}`] as Context[]); }); router.get('/api/history/paths', (req: Request, res: Response) => { res.json(['navigation.speedOverGround']); }); } const getRequestParams = ({ query }: FromToContextRequest, selfId: string) => { try { let from: ZonedDateTime; let to: ZonedDateTime; let shouldRefresh = false; // Check if user wants to work in UTC (default: false, use local timezone) const useUTC = query.useUTC === 'true' || query.useUTC === '1'; // Handle new backwards querying with start + duration if (query.start && query.duration) { const durationMs = parseDuration(query.duration); if (query.start === 'now') { // Always use current UTC time for 'now' regardless of useUTC setting to = ZonedDateTime.now(ZoneOffset.UTC); from = to.minusNanos(durationMs * 1000000); // Convert ms to nanoseconds shouldRefresh = query.refresh === 'true' || query.refresh === '1'; } else { // Parse start time with timezone conversion if needed to = parseDateTime(query.start, useUTC); from = to.minusNanos(durationMs * 1000000); } } else if (query.from && query.to) { // Traditional from/to querying (forward in time) with timezone conversion from = parseDateTime(query.from, useUTC); to = parseDateTime(query.to, useUTC); } else { throw new Error('Either (from + to) or (start + duration) parameters are required'); } const context: Context = getContext(query.context, selfId); const bbox = query.bbox; return { from, to, context, bbox, shouldRefresh }; } catch (e: unknown) { console.error('Full error details:', e); throw new Error( `Error extracting query parameters from ${JSON.stringify(query)}: ${e instanceof Error ? e.stack : e}` ); } }; // Parse duration string (e.g., "1h", "30m", "5s", "2d") function parseDuration(duration: string): number { const match = duration.match(/^(\d+)([smhd])$/); if (!match) { throw new Error(`Invalid duration format: ${duration}. Use format like "1h", "30m", "5s", "2d"`); } const value = parseInt(match[1]); const unit = match[2]; switch (unit) { case 's': return value * 1000; // seconds to milliseconds case 'm': return value * 60 * 1000; // minutes to milliseconds case 'h': return value * 60 * 60 * 1000; // hours to milliseconds case 'd': return value * 24 * 60 * 60 * 1000; // days to milliseconds default: throw new Error(`Unknown duration unit: ${unit}`); } } // Check if datetime string has timezone information function hasTimezoneInfo(dateTimeStr: string): boolean { // Check for 'Z' at the end, or '+'/'-' followed by timezone offset pattern return dateTimeStr.endsWith('Z') || /[+-]\d{2}:?\d{2}$/.test(dateTimeStr) || /[+-]\d{4}$/.test(dateTimeStr); } // Parse datetime string and convert to UTC if needed function parseDateTime(dateTimeStr: string, useUTC: boolean): ZonedDateTime { // Normalize the datetime string to include seconds if missing let normalizedStr = dateTimeStr; if (dateTimeStr.match(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}$/)) { // Add seconds if only HH:MM is provided normalizedStr = dateTimeStr + ':00'; } if (useUTC) { // When useUTC=true, treat the datetime as UTC if (hasTimezoneInfo(normalizedStr)) { // Already has timezone info, parse as-is return ZonedDateTime.parse(normalizedStr); } else { // No timezone info, assume UTC by adding 'Z' return ZonedDateTime.parse(normalizedStr + 'Z'); } } else { // When useUTC=false, handle timezone conversion if (hasTimezoneInfo(normalizedStr)) { // Already has timezone info, parse as-is (will be in UTC or specified timezone) return ZonedDateTime.parse(normalizedStr).withZoneSameInstant(ZoneOffset.UTC); } else { // No timezone info, treat as local time and convert to UTC try { // JavaScript Date constructor treats ISO strings without timezone as local time const localDate = new Date(normalizedStr); if (isNaN(localDate.getTime())) { throw new Error('Invalid date'); } // Convert to UTC ISO string and parse with ZonedDateTime const utcIsoString = localDate.toISOString(); return ZonedDateTime.parse(utcIsoString); } catch (e) { throw new Error(`Unable to parse datetime '${dateTimeStr}': ${e}. Use format like '2025-08-13T08:00:00' or '2025-08-13T08:00:00Z'`); } } } } function getContext(contextFromQuery: string, selfId: string): Context { if ( !contextFromQuery || contextFromQuery === 'vessels.self' || contextFromQuery === 'self' ) { return `vessels.${selfId}` as Context; } return contextFromQuery.replace(/ /gi, '') as Context; } export class HistoryAPI { readonly selfContextPath: string; constructor( private selfId: string, private dataDir: string ) { this.selfContextPath = toContextFilePath(`vessels.${selfId}` as Context); } async getValues( context: Context, from: ZonedDateTime, to: ZonedDateTime, shouldRefresh: boolean, debug: (k: string) => void, // eslint-disable-next-line @typescript-eslint/no-explicit-any req: Request<ParamsDictionary, any, any, ParsedQs, Record<string, any>>, // eslint-disable-next-line @typescript-eslint/no-explicit-any res: Response<any, Record<string, any>> ) { try { const timeResolutionMillis = req.query.resolution ? Number.parseFloat(req.query.resolution as string) : (to.toEpochSecond() - from.toEpochSecond()) / 500 * 1000; const pathExpressions = ((req.query.paths as string) || '') .replace(/[^0-9a-z.,:_]/gi, '') .split(','); const pathSpecs: PathSpec[] = pathExpressions.map(splitPathExpression); // Handle position and numeric paths together const allResult = pathSpecs.length ? await this.getNumericValues( context, from, to, timeResolutionMillis, pathSpecs, debug ) : Promise.resolve({ context, range: { from: from.toString() as Timestamp, to: to.toString() as Timestamp, }, values: [], data: [], }); // Add refresh headers if shouldRefresh is enabled if (shouldRefresh) { const refreshIntervalSeconds = Math.max(Math.round(timeResolutionMillis / 1000), 1); // At least 1 second res.setHeader('Cache-Control', 'no-cache, no-store, must-revalidate'); res.setHeader('Pragma', 'no-cache'); res.setHeader('Expires', '0'); res.setHeader('Refresh', refreshIntervalSeconds.toString()); // Add refresh info to response (allResult as any).refresh = { enabled: true, intervalSeconds: refreshIntervalSeconds, nextRefresh: new Date(Date.now() + refreshIntervalSeconds * 1000).toISOString() }; } res.json(allResult); } catch (error) { debug(`Error in getValues: ${error}`); res.status(500).json({ error: 'Internal server error', message: error instanceof Error ? error.message : String(error), }); } } async getNumericValues( context: Context, from: ZonedDateTime, to: ZonedDateTime, timeResolutionMillis: number, pathSpecs: PathSpec[], debug: (k: string) => void ): Promise<DataResult> { const allData: { [path: string]: Array<[Timestamp, unknown]> } = {}; // Process each path and collect data await Promise.all( pathSpecs.map(async pathSpec => { try { // Sanitize the path to prevent directory traversal and SQL injection const sanitizedPath = pathSpec.path .replace(/[^a-zA-Z0-9._]/g, '') // Only allow alphanumeric, dots, underscores .replace(/\./g, '/'); const filePath = path.join( this.dataDir, this.selfContextPath, sanitizedPath, '*.parquet' ); // Convert ZonedDateTime to ISO string format matching parquet schema const fromIso = from.toInstant().toString(); const toIso = to.toInstant().toString(); // Build query with time bucketing - fix type casting const query = ` SELECT strftime(DATE_TRUNC('seconds', EPOCH_MS(CAST(FLOOR(EPOCH_MS(signalk_timestamp::TIMESTAMP) / ${timeResolutionMillis}) * ${timeResolutionMillis} AS BIGINT)) ), '%Y-%m-%dT%H:%M:%SZ') as timestamp, ${getAggregateExpression(pathSpec.aggregateMethod, pathSpec.path)} as value, FIRST(value_json) as value_json FROM '${filePath}' WHERE signalk_timestamp >= '${fromIso}' AND signalk_timestamp < '${toIso}' AND (value IS NOT NULL OR value_json IS NOT NULL) GROUP BY timestamp ORDER BY timestamp `; const duckDB = await DuckDBInstance.create(); const connection = await duckDB.connect(); try { const result = await connection.runAndReadAll(query); const rows = result.getRowObjects(); // Convert rows to the expected format using bucketed timestamps const pathData: Array<[Timestamp, unknown]> = rows.map( (row) => { const rowData = row as { timestamp: Timestamp; value: unknown; value_json?: string; }; const { timestamp } = rowData; // Handle both JSON values (like position objects) and simple values const value = rowData.value_json ? JSON.parse(String(rowData.value_json)) : rowData.value; // For position paths, ensure we return the full position object if ( pathSpec.path === 'navigation.position' && value && typeof value === 'object' ) { // Position data is already an object with latitude/longitude // No reassignment needed, keeping original value } return [timestamp, value]; } ); allData[pathSpec.path] = pathData; } finally { connection.disconnectSync(); } } catch (error) { debug(`Error querying path ${pathSpec.path}: ${error}`); allData[pathSpec.path] = []; } }) ); // Merge all path data into time-ordered rows const mergedData = this.mergePathData(allData, pathSpecs); // Add EMA and SMA calculations to numeric columns const enhancedData = this.addMovingAverages(mergedData, pathSpecs); return { context, range: { from: from.toString() as Timestamp, to: to.toString() as Timestamp, }, values: this.buildValuesWithMovingAverages(pathSpecs), data: enhancedData, } as DataResult; } private mergePathData( allData: { [path: string]: Array<[Timestamp, unknown]> }, pathSpecs: PathSpec[] ): Array<[Timestamp, ...unknown[]]> { // Create a map of all unique timestamps const timestampMap = new Map<string, unknown[]>(); pathSpecs.forEach((pathSpec, index) => { const pathData = allData[pathSpec.path] || []; pathData.forEach(([timestamp, value]) => { if (!timestampMap.has(timestamp)) { timestampMap.set( timestamp, new Array(pathSpecs.length).fill(null) ); } timestampMap.get(timestamp)![index] = value; }); }); // Convert to sorted array format return Array.from(timestampMap.entries()) .sort(([a], [b]) => a.localeCompare(b)) .map(([timestamp, values]) => [timestamp as Timestamp, ...values]); } private addMovingAverages( data: Array<[Timestamp, ...unknown[]]>, pathSpecs: PathSpec[] ): Array<[Timestamp, ...unknown[]]> { if (data.length === 0) return data; const smaPeriod = 10; const emaAlpha = 0.2; // For each column, track EMA and SMA state const columnEMAs: (number | null)[] = new Array(pathSpecs.length).fill(null); const columnSMAWindows: number[][] = pathSpecs.map(() => []); return data.map((row, rowIndex) => { const [timestamp, ...values] = row; const enhancedValues: unknown[] = []; values.forEach((value, colIndex) => { enhancedValues.push(value); // Calculate EMA and SMA for numeric values only if (typeof value === 'number' && !isNaN(value)) { // Calculate EMA if (columnEMAs[colIndex] === null) { columnEMAs[colIndex] = value; // First value } else { columnEMAs[colIndex] = emaAlpha * value + (1 - emaAlpha) * columnEMAs[colIndex]!; } // Calculate SMA columnSMAWindows[colIndex].push(value); if (columnSMAWindows[colIndex].length > smaPeriod) { columnSMAWindows[colIndex] = columnSMAWindows[colIndex].slice(-smaPeriod); } const sma = columnSMAWindows[colIndex].reduce((sum, val) => sum + val, 0) / columnSMAWindows[colIndex].length; // Add EMA and SMA as additional values (rounded to 3 decimal places) enhancedValues.push(Math.round(columnEMAs[colIndex]! * 1000) / 1000); // EMA enhancedValues.push(Math.round(sma * 1000) / 1000); // SMA } else { // Non-numeric values get null for EMA/SMA enhancedValues.push(null); // EMA enhancedValues.push(null); // SMA } }); return [timestamp, ...enhancedValues] as [Timestamp, ...unknown[]]; }); } private buildValuesWithMovingAverages(pathSpecs: PathSpec[]): Array<{path: Path; method: AggregateMethod}> { const result: Array<{path: Path; method: AggregateMethod}> = []; pathSpecs.forEach(({ path, aggregateMethod }) => { // Add original path result.push({ path, method: aggregateMethod }); // Add EMA and SMA paths for this column result.push({ path: `${path}.ema` as Path, method: 'ema' as AggregateMethod }); result.push({ path: `${path}.sma` as Path, method: 'sma' as AggregateMethod }); }); return result; } } function splitPathExpression(pathExpression: string): PathSpec { const parts = pathExpression.split(':'); let aggregateMethod = (parts[1] || 'average') as AggregateMethod; // Auto-select appropriate default method for complex data types if (parts[0] === 'navigation.position' && !parts[1]) { aggregateMethod = 'first' as AggregateMethod; } // Validate the aggregation method const validMethods = ['average', 'min', 'max', 'first', 'last', 'mid', 'middle_index']; if (parts[1] && !validMethods.includes(parts[1])) { aggregateMethod = 'average' as AggregateMethod; } return { path: parts[0] as Path, queryResultName: parts[0].replace(/\./g, '_'), aggregateMethod, aggregateFunction: (functionForAggregate[aggregateMethod] as string) || 'avg', }; } const functionForAggregate: { [key: string]: string } = { average: 'avg', min: 'min', max: 'max', first: 'first', last: 'last', mid: 'median', middle_index: 'nth_value', } as const; function getAggregateFunction(method: AggregateMethod): string { switch (method) { case 'average': return 'AVG'; case 'min': return 'MIN'; case 'max': return 'MAX'; case 'first': return 'FIRST'; case 'last': return 'LAST'; case 'mid': return 'MEDIAN'; case 'middle_index': return 'NTH_VALUE'; default: return 'AVG'; } } function getValueExpression(pathName: string): string { // For position data, use value_json since the value is an object if (pathName === 'navigation.position') { return 'value_json'; } // For numeric data, try to cast to DOUBLE, fallback to the original value return 'TRY_CAST(value AS DOUBLE)'; } function getAggregateExpression(method: AggregateMethod, pathName: string): string { const valueExpr = getValueExpression(pathName); if (method === 'middle_index') { // For middle_index, use FIRST as a simple fallback for now // TODO: Implement proper middle index selection return `FIRST(${valueExpr})`; } return `${getAggregateFunction(method)}(${valueExpr})`; }