UNPKG

@codeplaydata/datasus

Version:

This application decompress the datasus micro data and serve as a gateway class.

72 lines (71 loc) 3.35 kB
import { Command } from "../Command.js"; import { Subset } from "../../core/Subset.js"; import { DATASUSGateway } from "../../interface/gateway/DATASUSGateway.js"; import { Parser } from "../../interface/utils/Parser.js"; import { Records } from "../../core/Records.js"; import { DataSource } from "../../core/Datasource.js"; /** * Job orchestrator for processing DATASUS files. * * Responsibilities: * - Query the gateway and discover files; * - Download files to the configured data path; * - Split the workload into chunks and delegate execution to the JobScheduler; * - Chain parser and callback, when provided. * * Type parameters: * S: Subset type (filters and data source); * D: DataSource type (dataset identifier, e.g., 'BI'); * G: Gateway implementation that lists/downloads files; * P: Parser applied to the records emitted by child processes. */ export declare class JobOrchestrator<S extends Subset, D extends DataSource, G extends DATASUSGateway<S>, P extends Parser<Records>> implements Command { private gateway; readonly DATA_PATH: string; readonly MAX_CONCURRENT_PROCESSES: number; readonly output: 'stdout' | 'file'; readonly filters?: Map<string, string | string[]> | undefined; readonly callback?: Function | undefined; private _files; private _chunks; private dataSource; private parser; private readonly resolvedDataPath; /** * Discovered files (short: filenames) to be processed. */ get files(): string[]; /** * Chunks (partitions) of the file list, used to control concurrency. */ get chunks(): string[][]; /** * Protected constructor. Use the static init method to instantiate. * @param gateway Gateway that lists and downloads files via FTP. * @param DATA_PATH Local path where data will be saved (default './'). * @param MAX_CONCURRENT_PROCESSES Maximum number of parallel processes. * @param output Preferred output: 'stdout' (console) or 'file' (files). * @param filters Filters applied to processed records. * @param callback Callback function called for each emitted record. */ protected constructor(gateway: G, DATA_PATH: string | undefined, MAX_CONCURRENT_PROCESSES: number, output?: 'stdout' | 'file', filters?: Map<string, string | string[]> | undefined, callback?: Function | undefined); /** * Factory method for creating an orchestrator with basic configuration. */ static init(gateway: DATASUSGateway<Subset>, filters?: Map<string, string | string[]>, callback?: Function, logOutput?: 'stdout' | 'file', MAX_CONCURRENT_PROCESSES?: number, DATA_PATH?: string): JobOrchestrator<Subset, DataSource, DATASUSGateway<Subset>, Parser<Records>>; /** * Defines the subset to be processed, resolves the file list and splits into chunks. * @param subset Filters and data origin. * @param parser Optional parser to transform emitted records. */ subset(subset: S, parser?: P): Promise<void>; /** * Resolves the path to the default job script (compiled under dist). */ get defaultJobScript(): string; /** * Executes the flow: download files and schedule jobs per chunk. * @param jobScript Path to the job script (optional). Uses default if not provided. */ exec(jobScript?: string): Promise<void>; }