digitaltwin-core
Version:
Minimalist framework to collect and handle data in a Digital Twin project
182 lines • 6.7 kB
TypeScript
import type { Component, ScheduleRunnable, Servable } from './interfaces.js';
import type { DataResponse, HarvesterConfiguration } from './types.js';
import type { DataRecord } from '../types/data_record.js';
import type { DatabaseAdapter } from '../database/database_adapter.js';
import type { StorageService } from '../storage/storage_service.js';
import type { HttpMethod } from '../engine/endpoints.js';
import type { OpenAPIDocumentable, OpenAPIComponentSpec } from '../openapi/types.js';
/**
* Abstract base class for data harvesting components.
*
* Harvesters process and analyze data that has been collected by Collectors,
* applying transformations, aggregations, or other data processing operations.
* They can be triggered by new source data or run on a schedule.
*
* Key features:
* - Process existing collected data with configurable ranges
* - Support both time-based and count-based data retrieval
* - Can be triggered by source data changes or scheduled execution
* - Provide HTTP endpoints for accessing processed results
*
* @example
* ```typescript
* class TrafficAnalysisHarvester extends Harvester {
* getUserConfiguration() {
* return {
* name: 'traffic-analysis',
* type: 'harvester',
* source: 'traffic-collector',
* source_range: '1h', // Process last hour of data
* schedule: '0 *\/15 * * * *' // Run every 15 minutes
* };
* }
*
* async harvest(data: DataRecord[]): Promise<DataRecord[]> {
* // Process traffic data and return analysis results
* return this.analyzeTrafficPatterns(data);
* }
* }
* ```
*/
export declare abstract class Harvester implements Component<HarvesterConfiguration>, ScheduleRunnable, Servable, OpenAPIDocumentable {
protected db: DatabaseAdapter;
protected storage: StorageService;
private _configCache?;
/**
* Injects database and storage dependencies into the harvester.
*
* Called during component initialization to provide access to
* data storage and file operations.
*
* @param db - Database adapter for reading source data
* @param storage - Storage service for file operations
*/
setDependencies(db: DatabaseAdapter, storage: StorageService): void;
/**
* Provides the basic harvester configuration.
*
* Implementations must return configuration specifying the harvester's
* name, data source, processing range, and scheduling information.
*
* @returns Basic harvester configuration without defaults applied
*
* @example
* ```typescript
* getUserConfiguration() {
* return {
* name: 'weather-analysis',
* type: 'harvester',
* source: 'weather-collector',
* source_range: '24h',
* schedule: '0 0 * * * *' // Daily at midnight
* };
* }
* ```
*/
abstract getUserConfiguration(): HarvesterConfiguration;
/**
* Returns the complete harvester configuration with defaults applied.
*
* Merges user configuration with sensible defaults for optional settings.
* This final configuration is used by the engine and scheduler.
*
* @returns Complete configuration with all defaults applied
*/
getConfiguration(): HarvesterConfiguration;
/**
* Returns the cron schedule for this harvester.
*
* For 'on-source' trigger mode, returns empty string (no schedule).
* For 'scheduled' mode, uses the provided schedule or defaults to every minute.
*
* @returns Cron expression string or empty string for source-triggered mode
*/
getSchedule(): string;
/**
* Allows subclasses to define a custom schedule.
*
* Override this method to provide a custom cron expression
* that differs from the default every-minute schedule.
*
* @returns Custom cron expression string
*
* @example
* ```typescript
* getCustomSchedule() {
* return '0 0 *\/6 * * *'; // Every 6 hours
* }
* ```
*/
getCustomSchedule?(): string;
/**
* Processes source data and returns harvested results.
*
* This is the main data processing method that implementations must provide.
* It receives source data (from the configured source component) and any
* dependency data, then performs analysis, transformation, or aggregation.
*
* @param sourceData - Data from the source component (single record or array)
* @param dependenciesData - Data from dependency components, keyed by component name
* @returns Processed data as Buffer(s) to be stored
*
* @example
* ```typescript
* async harvest(sourceData: DataRecord[], dependenciesData: Record<string, DataRecord[]>) {
* const trafficData = sourceData.map(r => JSON.parse(r.data.toString()));
* const analysis = this.performTrafficAnalysis(trafficData);
* return Buffer.from(JSON.stringify(analysis));
* }
* ```
*/
abstract harvest(sourceData: DataRecord | DataRecord[], dependenciesData: Record<string, DataRecord | DataRecord[] | null>): Promise<Buffer | Buffer[]>;
/**
* Main execution method for the harvester.
*
* Orchestrates the harvesting process by:
* 1. Determining the date range for data retrieval
* 2. Fetching source and dependency data
* 3. Calling the harvest method with the data
* 4. Storing the results in the database
*
* @returns True if harvesting was successful, false if no data to process
*
* @throws {Error} When source component is not specified
* @throws {Error} When data processing fails
*/
run(): Promise<boolean>;
/**
* HTTP endpoints
*/
getEndpoints(): Array<{
method: HttpMethod;
path: string;
handler: (...args: any[]) => any;
responseType?: string;
}>;
/**
* Retrieve latest harvested data
*/
retrieve(): Promise<DataResponse>;
/**
* Get source data within the specified range
*/
private getSourceData;
/**
* Get data from dependent components
*/
private getDependenciesData;
/**
* Store harvesting results
*/
private storeResults;
/**
* Returns the OpenAPI specification for this harvester's endpoints.
*
* Generates documentation for the GET endpoint that retrieves harvested data.
* Can be overridden by subclasses for more detailed specifications.
*
* @returns {OpenAPIComponentSpec} OpenAPI paths, tags, and schemas for this harvester
*/
getOpenAPISpec(): OpenAPIComponentSpec;
}
//# sourceMappingURL=harvester.d.ts.map