etl
Version:
Collection of stream-based components that form an ETL pipeline
177 lines (142 loc) • 4.37 kB
JavaScript
const Streamz = require('streamz');
const Promise = require('bluebird');
const util = require('util');
function Bulk(action,client,index,type,options) {
if (!(this instanceof Bulk))
return new Bulk(action,client,index,type,options);
if (!client)
throw 'CLIENT_MISSING';
if (!action)
throw 'ACTION_MISSING';
Streamz.call(this,options);
this.options = options || {};
if (this.options.pushResult) // legacy fix
this.options.pushResults = this.options.pushResult;
this.action = action;
this.index = index;
this.type = type;
this.client = client;
}
util.inherits(Bulk,Streamz);
Bulk.prototype.getMeta = function(d) {
const res = {};
const action = this.action == 'upsert' ? 'update' : this.action;
const obj = res[action] = {
_id : d._id,
};
delete d._id;
if (!this.index) {
obj._index = d._index;
delete d._index;
}
if (!this.type && d._type) {
obj._type = d._type;
delete d._type;
}
if (!this.parent) {
obj.parent = d.parent;
delete d.parent;
}
if (!this.routing) {
obj.routing = d.routing;
delete d.routing;
}
return res;
};
Bulk.prototype._fn = function(d) {
let itemsSuccessfullyPushed = [];
let retries;
let itemsToProcess = [].concat(d).reduce((p,d) => {
if (this.action == 'custom') {
const body = d.body;
delete d.body;
p.push(d);
if (body)
p.push(body);
return p;
}
p.push(this.getMeta(d));
d = d._source || d;
if (this.action == 'index')
p.push(d);
else if (this.action == 'upsert')
p.push({doc:d,doc_as_upsert:true});
else if(this.action == 'update')
p.push({doc:d});
return p;
},[]);
const processError = e => {
retries = retries || [];
const retryNo = retries.length;
if (!this.options.maxRetries || retryNo >= this.options.maxRetries) {
if (e) e.retries = retries;
throw e || 'MAXIMUM_RETRIES';
}
if (this.options.debug)
console.log('Retry',e.message);
let retryDelay;
if (this.options.backoffDelay > 0) {
retryDelay = this.options.backoffDelay * Math.pow(2,retryNo);
if (this.options.backoffVariance > 0)
retryDelay *= (1 + this.options.backoffVariance * (Math.random() -0.5));
if (this.options.maxBackoffDelay > 0)
retryDelay = Math.min(retryDelay, this.options.maxBackoffDelay);
} else {
retryDelay = this.options.retryDelay || 30000;
}
retries.push(retryDelay);
return Promise.delay(retryDelay).then(execute);
};
const execute = () => {
const params = {
body : itemsToProcess,
index: this.index,
consistency : this.options.consistency,
refresh : this.options.refresh,
routing : this.options.routing,
timeout : this.options.timeout,
fields : this.options.fields
};
// type is forbidden in elasticsearch > 7
if (this.type) {
params.type = this.type;
}
return this.client.bulk(params)
.then(e => {
if (!this.options.pushResults && !this.options.pushErrors)
return;
if (e.body) e = e.body;
// Insert a copy of the original body
e.items.forEach((e,i) => e.body = itemsToProcess[i * 2 + 1]);
if (this.options.maxRetries) {
let itemsToRetry;
e.items.forEach((item, index) => {
const verb = item.update || item.index || item.create;
if (verb && verb.error && verb.error.type !== 'mapper_parsing_exception' && verb.error.type !== 'document_parsing_exception') {
itemsToRetry = itemsToRetry || [];
itemsToRetry.push(itemsToProcess[index * 2]);
itemsToRetry.push(itemsToProcess[index * 2 + 1]);
}
else {
itemsSuccessfullyPushed.push(item);
}
});
if (itemsToRetry) {
itemsToProcess = itemsToRetry;
return processError();
}
e.items = itemsSuccessfullyPushed;
}
if (this.options.pushResults)
return e;
const items = e.items.filter(d => {
const verb = d.update || d.index || d.create;
d.error = verb.error;
return verb.status !== 201 && verb.status !== 200;
});
return items.length && items || undefined;
}, e => processError(e));
};
return execute();
};
module.exports = Bulk;