signalk-parquet
Version:
SignalK plugin to save marine data directly to Parquet files with regimen-based control
166 lines • 4.48 kB
TypeScript
import { ServerAPI } from '@signalk/server-api';
import { DataRecord, DataSummary } from './types';
export interface TimeRange {
start: Date;
end: Date;
}
export interface SampleRequest {
dataPath: string;
sampleSize: number;
timeRange?: TimeRange;
strategy: 'random' | 'systematic' | 'recent' | 'representative';
disableSampling?: boolean;
}
export interface CorrelationDataset {
paths: string[];
data: Record<string, any[]>;
timeAlignment: 'exact' | 'interpolated' | 'windowed';
correlationMatrix?: number[][];
}
export interface AnomalyBaselineRequest {
dataPath: string;
baselineRange: TimeRange;
detectionRange: TimeRange;
threshold: number;
}
export interface QueryPerformanceMetrics {
executionTime: number;
rowsScanned: number;
rowsReturned: number;
cacheHit: boolean;
}
export interface EnhancedDataSummary extends DataSummary {
performanceMetrics: QueryPerformanceMetrics;
samplingInfo?: {
originalCount: number;
sampleCount: number;
samplingStrategy: string;
};
}
/**
* Enhanced data query service specifically designed for AI analysis
* Integrates with existing DuckDB infrastructure and adds intelligent sampling
*/
export declare class DataQueryService {
private app;
private outputDirectory;
private vesselContext?;
private queryCache;
constructor(app: ServerAPI, outputDirectory: string, vesselContext?: string);
/**
* Get intelligently sampled data for AI analysis
* Balances data representativeness with Claude token limits
*/
getSampleData(request: SampleRequest): Promise<{
data: DataRecord[];
summary: EnhancedDataSummary;
}>;
/**
* Generate comprehensive data summary for analysis context
*/
getDataSummary(dataPath: string, timeRange?: TimeRange): Promise<EnhancedDataSummary>;
/**
* Prepare correlation data across multiple paths
*/
getCorrelationData(paths: string[], timeRange?: TimeRange): Promise<CorrelationDataset>;
/**
* Get baseline data for anomaly detection
*/
getAnomalyBaselineData(request: AnomalyBaselineRequest): Promise<any[]>;
/**
* Find data files for a given path and time range
*/
private findDataFiles;
/**
* Build query to get all data without sampling
*/
private buildAllDataQuery;
/**
* Build count query to get total record count
*/
private buildCountQuery;
/**
* Build systematic sampling query
*/
private buildSystematicSampleQuery;
/**
* Build recent sampling query - prioritizes most recent data
*/
private buildRecentSampleQuery;
/**
* Build representative sampling query - balances across time periods
*/
private buildRepresentativeSampleQuery;
/**
* Build random sampling query
*/
private buildRandomSampleQuery;
/**
* Build summary statistics query
*/
private buildSummaryQuery;
/**
* Execute DuckDB query and return results
*/
private executeDuckDBQuery;
/**
* Execute count query and return single number
*/
private executeCountQuery;
/**
* Generate enhanced data summary with performance metrics
*/
private generateEnhancedSummary;
/**
* Analyze data columns
*/
private analyzeColumns;
/**
* Infer data type from values
*/
private inferDataType;
/**
* Calculate statistical summaries
*/
private calculateStatistics;
/**
* Calculate data quality metrics
*/
private calculateDataQuality;
/**
* Calculate timeliness metric
*/
private calculateTimeliness;
/**
* Create empty summary for error cases
*/
private createEmptySummary;
/**
* Generate cache key for query caching
*/
private generateCacheKey;
/**
* Align time series data for correlation analysis
*/
private alignTimeSeriesData;
/**
* Calculate correlation matrix between datasets
*/
private calculateCorrelationMatrix;
/**
* Calculate statistical baseline for anomaly detection
*/
private calculateStatisticalBaseline;
/**
* Clear query cache
*/
clearCache(): void;
/**
* Get cache statistics
*/
getCacheStats(): {
size: number;
hitRate: number;
};
}
//# sourceMappingURL=data-query-service.d.ts.map