UNPKG

digitaltwin-core

Version:

Minimalist framework to collect and handle data in a Digital Twin project

393 lines 14.4 kB
/** * Utility class for parsing harvester source range configurations. * * Handles conversion of various source range formats (time-based like '1h', '30m' * or count-based like '100') into structured date ranges for data retrieval. * * @example * ```typescript * // Parse time-based range: get data from last hour * const result = SourceRangeParser.parseSourceRange(new Date(), '1h'); * * // Parse count-based range: get last 50 records * const result = SourceRangeParser.parseSourceRange(new Date(), 50); * ``` */ class SourceRangeParser { static { this.ZERO_DATE = new Date('1970-01-01T00:00:00Z'); } /** * Gets the zero date used as fallback for empty datasets. * * @returns Unix epoch date (1970-01-01T00:00:00Z) */ static get zeroDate() { return new Date(this.ZERO_DATE); } /** * Parses source range configuration into a structured result. * * @param latestDate - The latest date in the existing data * @param sourceRange - Range specification (e.g., '1h', '30m', '7d', or number for count) * @returns Parsed range result with start/end dates or record limit * * @throws {Error} When source range format is invalid */ static parseSourceRange(latestDate, sourceRange) { if (!sourceRange) { return { startDate: latestDate, limit: 1 }; } // If it's a number or numeric string (limit mode) if (typeof sourceRange === 'number' || /^\d+$/.test(sourceRange)) { return { startDate: latestDate, limit: Number(sourceRange) }; } const sourceRangeStr = sourceRange.toString(); let value; let unit; // Parse time-based ranges if (sourceRangeStr.includes('d')) { value = parseInt(sourceRangeStr.replace('d', '')); unit = 'days'; } else if (sourceRangeStr.includes('h')) { value = parseInt(sourceRangeStr.replace('h', '')); unit = 'hours'; } else if (sourceRangeStr.includes('m')) { value = parseInt(sourceRangeStr.replace('m', '')); unit = 'minutes'; } else if (sourceRangeStr.includes('s')) { value = parseInt(sourceRangeStr.replace('s', '')); unit = 'seconds'; } else { throw new Error(`Invalid source range format: ${sourceRange}`); } // For time-based ranges, start from latestDate and go forward const startDate = latestDate; const endDate = this.addTime(startDate, value, unit); return { startDate, endDate }; } /** * Adds time to a date based on the specified unit. * * @param date - Base date to add time to * @param value - Amount of time to add * @param unit - Time unit ('days', 'hours', 'minutes', 'seconds') * @returns New date with added time */ static addTime(date, value, unit) { const result = new Date(date); switch (unit) { case 'days': result.setDate(result.getDate() + value); break; case 'hours': result.setHours(result.getHours() + value); break; case 'minutes': result.setMinutes(result.getMinutes() + value); break; case 'seconds': result.setSeconds(result.getSeconds() + value); break; } return result; } } /** * Abstract base class for data harvesting components. * * Harvesters process and analyze data that has been collected by Collectors, * applying transformations, aggregations, or other data processing operations. * They can be triggered by new source data or run on a schedule. * * Key features: * - Process existing collected data with configurable ranges * - Support both time-based and count-based data retrieval * - Can be triggered by source data changes or scheduled execution * - Provide HTTP endpoints for accessing processed results * * @example * ```typescript * class TrafficAnalysisHarvester extends Harvester { * getUserConfiguration() { * return { * name: 'traffic-analysis', * type: 'harvester', * source: 'traffic-collector', * source_range: '1h', // Process last hour of data * schedule: '0 *\/15 * * * *' // Run every 15 minutes * }; * } * * async harvest(data: DataRecord[]): Promise<DataRecord[]> { * // Process traffic data and return analysis results * return this.analyzeTrafficPatterns(data); * } * } * ``` */ export class Harvester { /** * Injects database and storage dependencies into the harvester. * * Called during component initialization to provide access to * data storage and file operations. * * @param db - Database adapter for reading source data * @param storage - Storage service for file operations */ setDependencies(db, storage) { this.db = db; this.storage = storage; } /** * Returns the complete harvester configuration with defaults applied. * * Merges user configuration with sensible defaults for optional settings. * This final configuration is used by the engine and scheduler. * * @returns Complete configuration with all defaults applied */ getConfiguration() { if (this._configCache) { return this._configCache; } const userConfig = this.getUserConfiguration(); // Apply defaults first, then user config const defaults = { triggerMode: 'on-source', source_range: 1, multiple_results: false, source_range_min: false, debounceMs: 1000, dependencies: [], dependenciesLimit: [] }; this._configCache = { ...defaults, ...userConfig }; return this._configCache; } /** * Returns the cron schedule for this harvester. * * For 'on-source' trigger mode, returns empty string (no schedule). * For 'scheduled' mode, uses the provided schedule or defaults to every minute. * * @returns Cron expression string or empty string for source-triggered mode */ getSchedule() { const config = this.getConfiguration(); if (config.triggerMode === 'on-source') { return ''; } // Default to every minute instead of every second return '0 * * * * *'; } /** * Main execution method for the harvester. * * Orchestrates the harvesting process by: * 1. Determining the date range for data retrieval * 2. Fetching source and dependency data * 3. Calling the harvest method with the data * 4. Storing the results in the database * * @returns True if harvesting was successful, false if no data to process * * @throws {Error} When source component is not specified * @throws {Error} When data processing fails */ async run() { const config = this.getConfiguration(); if (!config.source) { throw new Error(`Harvester ${config.name} must specify a source component`); } // Get the latest harvested date const latestHarvestedRecord = await this.db.getLatestByName(config.name); // Calculate the starting point for harvesting let latestDate; if (!latestHarvestedRecord) { // First run - get first source record and start from one second before const firstSourceRecord = await this.db.getFirstByName(config.source); if (!firstSourceRecord) { return false; } latestDate = new Date(firstSourceRecord.date.getTime() - 1000); } else { latestDate = latestHarvestedRecord.date; } // Parse source range const { startDate, endDate, limit } = SourceRangeParser.parseSourceRange(latestDate, config.source_range); // Get source data based on range const sourceData = await this.getSourceData(config.source, startDate, endDate, limit); if (!sourceData || sourceData.length === 0) { return false; } // Check if we have enough data (strict mode) if (limit && config.source_range_min && sourceData.length < limit) { return false; } // Calculate storage date const storageDate = endDate || sourceData[sourceData.length - 1].date; // Prepare source data for harvesting const sourceForHarvesting = limit === 1 && !endDate ? sourceData[0] : sourceData; // Get dependencies data const dependenciesData = await this.getDependenciesData(config.dependencies || [], config.dependenciesLimit || [], storageDate); // Execute harvesting const result = await this.harvest(sourceForHarvesting, dependenciesData); // Store results await this.storeResults(config, result, sourceData, storageDate); return true; } /** * HTTP endpoints */ getEndpoints() { return [ { method: 'get', path: `/${this.getConfiguration().endpoint}`, handler: this.retrieve.bind(this), responseType: this.getConfiguration().contentType } ]; } /** * Retrieve latest harvested data */ async retrieve() { const config = this.getConfiguration(); const record = await this.db.getLatestByName(config.name); if (!record) { return { status: 404, content: 'No data available' }; } const blob = await record.data(); return { status: 200, content: blob, headers: { 'Content-Type': record.contentType } }; } /** * Get source data within the specified range */ async getSourceData(sourceName, startDate, endDate, limit) { let sourceData; if (endDate) { // Time-based range: get records between startDate and endDate sourceData = await this.db.getByDateRange(sourceName, startDate, endDate, limit); } else if (limit) { // Count-based: get records after startDate with limit sourceData = await this.db.getAfterDate(sourceName, startDate, limit); } else { // Default: get latest record after startDate sourceData = await this.db.getAfterDate(sourceName, startDate, 1); } return sourceData; } /** * Get data from dependent components */ async getDependenciesData(dependencies, dependenciesLimit, storageDate) { const dependenciesData = {}; for (let i = 0; i < dependencies.length; i++) { const dependency = dependencies[i]; const limit = dependenciesLimit[i] || 1; if (limit === 1) { // Get single latest record before storage date const dependencyRecord = await this.db.getLatestBefore(dependency, storageDate); dependenciesData[dependency] = dependencyRecord || null; } else { // Get multiple latest records before storage date const dependencyRecords = await this.db.getLatestRecordsBefore(dependency, storageDate, limit); dependenciesData[dependency] = dependencyRecords.length > 0 ? dependencyRecords : null; } } return dependenciesData; } /** * Store harvesting results */ async storeResults(config, result, sourceData, storageDate) { if (config.multiple_results && Array.isArray(result) && Array.isArray(sourceData)) { // Store each result with its corresponding source date for (let i = 0; i < result.length; i++) { const item = result[i]; const source = sourceData[i]; const url = await this.storage.save(item, config.name); await this.db.save({ name: config.name, type: config.contentType, url, date: source.date }); } } else { // Store single result const buffer = Array.isArray(result) ? result[0] : result; const url = await this.storage.save(buffer, config.name); await this.db.save({ name: config.name, type: config.contentType, url, date: storageDate }); } } /** * Returns the OpenAPI specification for this harvester's endpoints. * * Generates documentation for the GET endpoint that retrieves harvested data. * Can be overridden by subclasses for more detailed specifications. * * @returns {OpenAPIComponentSpec} OpenAPI paths, tags, and schemas for this harvester */ getOpenAPISpec() { const config = this.getConfiguration(); const path = `/${config.endpoint}`; const tagName = config.tags?.[0] || config.name; return { paths: { [path]: { get: { summary: `Get ${config.name} harvested data`, description: config.description, tags: [tagName], responses: { '200': { description: 'Latest harvested data', content: { [config.contentType]: { schema: { type: 'object' } } } }, '404': { description: 'No data available' } } } } }, tags: [ { name: tagName, description: config.description } ] }; } } //# sourceMappingURL=harvester.js.map