@codeplaydata/datasus
Version:
This application decompress the datasus micro data and serve as a gateway class.
72 lines (71 loc) • 3.35 kB
TypeScript
import { Command } from "../Command.js";
import { Subset } from "../../core/Subset.js";
import { DATASUSGateway } from "../../interface/gateway/DATASUSGateway.js";
import { Parser } from "../../interface/utils/Parser.js";
import { Records } from "../../core/Records.js";
import { DataSource } from "../../core/Datasource.js";
/**
* Job orchestrator for processing DATASUS files.
*
* Responsibilities:
* - Query the gateway and discover files;
* - Download files to the configured data path;
* - Split the workload into chunks and delegate execution to the JobScheduler;
* - Chain parser and callback, when provided.
*
* Type parameters:
* S: Subset type (filters and data source);
* D: DataSource type (dataset identifier, e.g., 'BI');
* G: Gateway implementation that lists/downloads files;
* P: Parser applied to the records emitted by child processes.
*/
export declare class JobOrchestrator<S extends Subset, D extends DataSource, G extends DATASUSGateway<S>, P extends Parser<Records>> implements Command {
private gateway;
readonly DATA_PATH: string;
readonly MAX_CONCURRENT_PROCESSES: number;
readonly output: 'stdout' | 'file';
readonly filters?: Map<string, string | string[]> | undefined;
readonly callback?: Function | undefined;
private _files;
private _chunks;
private dataSource;
private parser;
private readonly resolvedDataPath;
/**
* Discovered files (short: filenames) to be processed.
*/
get files(): string[];
/**
* Chunks (partitions) of the file list, used to control concurrency.
*/
get chunks(): string[][];
/**
* Protected constructor. Use the static init method to instantiate.
* @param gateway Gateway that lists and downloads files via FTP.
* @param DATA_PATH Local path where data will be saved (default './').
* @param MAX_CONCURRENT_PROCESSES Maximum number of parallel processes.
* @param output Preferred output: 'stdout' (console) or 'file' (files).
* @param filters Filters applied to processed records.
* @param callback Callback function called for each emitted record.
*/
protected constructor(gateway: G, DATA_PATH: string | undefined, MAX_CONCURRENT_PROCESSES: number, output?: 'stdout' | 'file', filters?: Map<string, string | string[]> | undefined, callback?: Function | undefined);
/**
* Factory method for creating an orchestrator with basic configuration.
*/
static init(gateway: DATASUSGateway<Subset>, filters?: Map<string, string | string[]>, callback?: Function, logOutput?: 'stdout' | 'file', MAX_CONCURRENT_PROCESSES?: number, DATA_PATH?: string): JobOrchestrator<Subset, DataSource, DATASUSGateway<Subset>, Parser<Records>>;
/**
* Defines the subset to be processed, resolves the file list and splits into chunks.
* @param subset Filters and data origin.
* @param parser Optional parser to transform emitted records.
*/
subset(subset: S, parser?: P): Promise<void>;
/**
* Resolves the path to the default job script (compiled under dist).
*/
get defaultJobScript(): string;
/**
* Executes the flow: download files and schedule jobs per chunk.
* @param jobScript Path to the job script (optional). Uses default if not provided.
*/
exec(jobScript?: string): Promise<void>;
}