UNPKG

nsyslog

Version:

Modular new generation log agent. Reads, transform, aggregate, correlate and send logs from sources to destinations

412 lines (363 loc) 11.4 kB
/** * File Reader Input Module * Handles file reading, monitoring, and watermarking for input processing. * @module FileInput */ const Input = require('../'), Semaphore = require('../../semaphore'), fs = require('fs-extra'), Path = require('path'), extend = require('extend'), minimatch = require('minimatch'), slash = require('slash'), logger = require("../../logger"), Watermark = require("../../watermark"), filesystem = require("./filesystem"), File = filesystem.File, Monitor = filesystem.Monitor, glob = filesystem.glob; const BUFFER = 1024 * 10; // Default buffer size for file reading const MAX_OPEN = 100; // Maximum number of open files const MODE = { offset: "offset", // Read mode based on offset watermark: "watermark" // Read mode based on watermark }; const OFFSET = { end: "end", // Start reading from the end of the file begin: "begin", // Start reading from the beginning of the file start: "start" // Alias for beginning of the file }; /** * File Reader Input * @class * @extends Input */ class FileInput extends Input { /** * Constructor for FileInput * @param {string} id - Unique identifier for the input. * @param {string} type - Type of the input. */ constructor(id, type) { super(id, type); /** * File map * @type {Object<string, filesystem.File>} */ this.files = {}; this.list = { read: {}, avail: {} }; this.sem = new Semaphore(1); /** * Inner watermark instance * @type {Watermark} */ this.watermark = null; /** * File watermarks * @type {Object} */ this.wm = null; /** * File monitor * @type {filesystem.Monitor} */ this.monitor = null; } /** * Configures the FileInput instance. * @param {Object} config - Configuration object. * @param {Function} callback - Callback function. */ async configure(config, callback) { config = config || {}; this.path = slash(Path.resolve(config.$path, config.path)); // Resolve and normalize the path if (config.exclude) this.exclude = slash(Path.resolve(config.$path, config.exclude)); // Resolve exclude path this.readmode = config.readmode || MODE.offset; // Set read mode this.offset = config.offset || MODE.end; // Set offset mode this.encoding = config.encoding || 'utf8'; // Set file encoding this.watch = config.watch || false; // Enable/disable file watching this.blocksize = config.blocksize || BUFFER; // Set block size for reading this.options = config.options || {}; // Additional options this.config = config; // Store configuration callback(); } /** * Retrieves the mode of the input. * @returns {string} The mode of the input. */ get mode() { return Input.MODE.pull; // Always return pull mode } /** * Retrieves the watermarks for the files. * @returns {Promise<Array<Object>>} List of watermarks. */ async watermarks() { // Filter and map watermark data to a simplified structure return Object.keys(this.wm || {}).filter(k => k != "_id").map(k => { return { key: `${this.id}:${this.type}@${k}`, // Unique key for the file long: this.wm[k].stats.size, // File size current: this.wm[k].offset, // Current offset }; }); } /** * Saves the current state of the files and watermarks. * @returns {Promise<void>} */ async save() { await this.sem.take(); // Acquire semaphore to ensure thread safety try { // Update watermark data for all files Object.keys(this.files).map(k => this.files[k]).forEach(file => { this.wm[file.path] = file.toJSON(); }); // Persist watermark data if the watermark instance exists if (this.watermark) this.watermark.save(this.wm); } catch (err) { logger.error(err); // Log any errors } this.sem.leave(); // Release semaphore } /** * Cleans up the file lists by removing unavailable files. */ sanityzeFiles() { // Remove unavailable files from the available list Object.keys(this.list.avail).filter(path => !this.files[path]).forEach(rp => delete this.list.avail[rp]); // Remove unavailable files from the read list Object.keys(this.list.read).filter(path => !this.files[path]).forEach(rp => { delete this.list.read[rp]; }); } /** * Opens a file for reading and initializes its state. * @param {string} path - Path to the file. * @returns {Promise<filesystem.File>} The opened file instance. */ async openFile(path) { let files = this.files, sem = this.sem, wm = this.wm; // Excluded files if (this.exclude && minimatch(path, this.exclude)) { logger.info(`${path} is excluded`); // Log excluded file return; } await sem.take(); // Acquire semaphore if (!files[path] || !files[path].fd) { // Initialize file state let tail = "", offset = 0, line = 0, lines = []; let fd = await fs.open(path, 'r'); // Open file descriptor let stats = await fs.fstat(fd); // Get file stats let buffer = this.blocksize; // Set buffer size let idx = parseInt(this.offset); // Parse offset // Determine initial offset based on read mode if (this.readmode == MODE.watermark) { // Use watermark data if available if (wm[path]) { offset = wm[path].offset || 0; tail = wm[path].tail || ""; line = wm[path].line || 0; lines = wm[path].lines || []; logger.silly(`Found ${path} in watermarks`); } else { // Default offset logic logger.silly(`Not found ${path} in watermarks`); if (idx >= 0) offset = idx; else if (this.offset == OFFSET.begin) offset = 0; else if (this.offset == OFFSET.start) offset = 0; else offset = stats.size; } } else if (this.readmode == MODE.offset) { // Offset-based reading if (idx >= 0) offset = idx; else if (this.offset == OFFSET.begin) offset = 0; else if (this.offset == OFFSET.start) offset = 0; else offset = stats.size; } // Create a new File instance and initialize its state files[path] = new File(path, fd, stats, offset, buffer, tail, line, lines); files[path].ready = true; // Mark file as ready this.list.read[path] = true; // Add to read list extend(true, wm, { [path]: files[path].toJSON() }); // Update watermark data } sem.leave(); // Release semaphore return files[path]; } /** * Reads lines from the monitored files. * @returns {Promise<Array<filesystem.File>>} List of files with updated lines. */ readlines() { let files = this.watch? Object.keys(this.list.read).map(k=>this.files[k]) : Object.keys(this.files).map(k=>this.files[k]); let all = files. filter(Boolean). map(async(file)=>{ try { await this.openFile(file.path); await file.sem.take(); logger.silly(`Reading ${file.path} from ${file.offset}`); if(!file.fd) return file; let res = await fs.read(file.fd,file.buffer,0,file.buffer.length,file.offset); file.tail += res.buffer.toString('utf8',0,res.bytesRead); file.offset += res.bytesRead; // File trunctation logger.silly(`Read ${res.bytesRead} from ${file.path}`); if(res.bytesRead==0) { let fstat = await fs.stat(file.path); if((fstat.size < file.offset) || (fstat.ino != file.stats.ino)) { if(fstat.ino != file.stats.ino) { logger.warn(`File ${file.path} seems to be another file. Reseting reference.`); } else { logger.warn(`File ${file.path} has been truncated. Reseting watermark`); } file.offset = 0; if(file.fd) { await fs.close(file.fd); file.fd = null; } } else { logger.silly(`Nothing to read from ${file.path}. Closing file`); // Stop reading file until change delete this.list.read[file.path]; file.ready = false; if(file.fd) { await fs.close(file.fd); file.fd = null; } } } let lines = file.tail.split("\n"); while(lines.length) { let line = lines.shift(); if(lines.length) { file.line++; file.lines.push({ln:file.line, line}); } else file.tail = line; } this.wm[file.path] = file.toJSON(); if(file.lines.length) this.list.avail[file.path] = true; file.sem.leave(); return file; }catch(err) { logger.error(err); file.sem.leave(); throw err; } }); return Promise.all(all); } /** * Fetches a single line from the available files. * @returns {Object|boolean} The fetched line or false if no lines are available. */ fetchLine() { this.sanityzeFiles(); let files = Object.keys(this.list.avail), len = files.length; if(!len) return false; let rnd = Math.floor(Math.random()*len); let file = this.files[files[rnd]]; let entry = file.lines.shift(); let data = { type : 'file', path : file.path, filename : file.filename, ln : entry.ln, originalMessage : entry.line }; if(!file.lines.length) delete this.list.avail[file.path]; return data; } /** * Starts the FileInput instance. * @param {Function} callback - Callback function. */ async start(callback) { try { this.watermark = new Watermark(this.config.$datadir); await this.watermark.start(); this.wm = await this.watermark.get(this.id); }catch(err) { return callback(err); } try { if(this.watch) { this.monitor = new Monitor(this.files); this.monitor.start(this.path, this.options); this.monitor.on('new',path=>this.openFile(path)); this.monitor.on('ready',path=>this.list.read[path]=true); } else { let files = await glob(this.path,{nodir:true}); await Promise.all(files.map(path=>this.openFile(path))); } callback(); }catch(err) { callback(err); } this.wmival = setInterval(this.save.bind(this),60000); } /** * Retrieves the next line from the input. * @param {Function} callback - Callback function. */ async next(callback) { if(this.closed) return callback(false); let data = this.fetchLine(); if(data) { callback(null,data); } else { try { await this.readlines(); data = this.fetchLine(); callback(null,data); }catch(err) { callback(err); } } } /** * Stops the FileInput instance and cleans up resources. * @param {Function} callback - Callback function. */ async stop(callback) { clearInterval(this.wmival); if(this.monitor) this.monitor.stop(); await this.sem.take(); let closeall = Object. keys(this.files). map(k=>this.files[k]). map(async(file)=>{ await file.sem.take(); await fs.close(file.fd); file.sem.leave(); }); try { await Promise.all(closeall); }catch(err) { logger.error(err); } this.sem.leave(); this.closed = true; await this.save(); setTimeout(callback,1000); } /** * Generates a unique key for an entry. * @param {Object} entry - The entry object. * @returns {string} The generated key. */ key(entry) { return `${entry.input}:${entry.type}@${entry.path}`; } } module.exports = FileInput;