UNPKG

elasticsearch-tools

Version:

Elasticsearch command line tools for importing, exporting, etc

140 lines (129 loc) 3.65 kB
#!/usr/bin/env node 'use strict'; var helpers = require('./helpers'); var fs = require('fs'); var program = require('commander'); var elasticsearch = require('elasticsearch'); var LineByLineReader = require('line-by-line'); var appInfo = require('../package.json'); var filesize = require('filesize'); var ProgressBar = require('progress'); var bar; // instance variables var isFileDone = false; var isPaused = false; var totalLines = 0; var currentCount = 0; var currentBatch = ''; // setup command line options program .version(appInfo.version, '-v, --version') .option('-u, --url <url>', 'the elasticsearch url to connect to') .option('-f, --file <file>', 'the file to read data from') .option( '-m, --max <items>', 'the max number of lines to process per batch (default: 20,000)', helpers.integer, 20000 ) .option( '--requestTimeout <ms>', 'ES CLIENT OPTION: milliseconds before an HTTP request will be aborted and retried. This can also be set per request (default: 30000)', helpers.integer, 30000 ) .parse(process.argv); // validate url and file helpers.validateUrlAndFile(program); // validate max items per batch if (program.max <= 0 || Number.isNaN(program.max)) { helpers.exit('You must pass in a valid --max option'); } // validate file exists if (!fs.existsSync(program.file)) { helpers.exit('The file you passed in does not exist.'); } // init client var client = new elasticsearch.Client({ requestTimeout: program.requestTimeout, hosts: program.url.split(',') }); // declare our bulk import function var bulkImport = function(cb) { client.bulk({ body: currentBatch }, function(err, response) { if (err) { helpers.exit(err); } if (response.error) { helpers.exit('When executing bulk query: ' + response.error.toString()); } // reset global variables currentCount = 0; currentBatch = ''; // exit or continue processing if (isFileDone) { console.log('Complete!'); process.exit(); } else { cb(); } }); }; var filestats = fs.statSync(program.file); console.log('Pre-Processing file of size: ' + filesize(filestats.size)); var preprocess = new LineByLineReader(program.file, { encoding: 'utf8', skipEmptyLines: false }); preprocess.on('error', helpers.exit); preprocess.on('line', function() { totalLines++; }); preprocess.on('end', function() { var barUpdateSize = 100000; var halfAPercent = Math.round(totalLines * 0.005); if (halfAPercent < barUpdateSize) barUpdateSize = halfAPercent; bar = new ProgressBar( 'Processing line :current of :total [:bar] :percent :elapseds', { width: 20, total: totalLines } ); var lr = new LineByLineReader(program.file, { encoding: 'utf8', skipEmptyLines: false }); var totalLinesProcessed = 0; console.log( 'Starting bulk imports with batches of ' + program.max + ' lines.' ); bar.render(); lr.on('error', function(err) { helpers.exit(err); }); lr.on('line', function(line) { totalLinesProcessed++; if (totalLinesProcessed % barUpdateSize == 0) bar.tick(barUpdateSize); currentCount++; currentBatch += line + '\n'; if (currentCount >= program.max && currentCount % 2 === 0) { lr.pause(); isPaused = true; bulkImport(function() { isPaused = false; lr.resume(); }); } }); lr.on('end', function() { isFileDone = true; bar.tick( totalLinesProcessed - Math.floor(totalLinesProcessed / barUpdateSize) * barUpdateSize ); if (!isPaused && currentCount > 0) { bulkImport(); } }); });