s-h-esd
Version:
A Node port of Seasonal Hybrid ESD (S-H-ESD), Twitter's AnomalyDetection.
339 lines (292 loc) • 12.3 kB
JavaScript
;
var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; };
var _date_utils = require('./date_utils');
var _detect_anoms = require('./detect_anoms');
function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } }
function median(x) {
var t_x = x.slice(0).sort(function (a, b) {
return a - b;
});
return t_x[parseInt(t_x.length / 2)];
}
function quantile(x, q) {
var t_x = x.slice(0).sort(function (a, b) {
return a - b;
});
var idx = Math.round(t_x.length * q);
return t_x[idx];
}
function detect_ts(df, _ref) {
var _ref$max_anoms = _ref.max_anoms,
max_anoms = _ref$max_anoms === undefined ? 0.10 : _ref$max_anoms,
_ref$direction = _ref.direction,
direction = _ref$direction === undefined ? 'pos' : _ref$direction,
_ref$alpha = _ref.alpha,
alpha = _ref$alpha === undefined ? 0.05 : _ref$alpha,
_ref$only_last = _ref.only_last,
only_last = _ref$only_last === undefined ? null : _ref$only_last,
_ref$threshold = _ref.threshold,
threshold = _ref$threshold === undefined ? null : _ref$threshold,
_ref$e_value = _ref.e_value,
e_value = _ref$e_value === undefined ? false : _ref$e_value,
_ref$longterm = _ref.longterm,
longterm = _ref$longterm === undefined ? false : _ref$longterm,
_ref$piecewise_median = _ref.piecewise_median_period_weeks,
piecewise_median_period_weeks = _ref$piecewise_median === undefined ? 2 : _ref$piecewise_median,
_ref$plot = _ref.plot,
plot = _ref$plot === undefined ? false : _ref$plot,
_ref$y_log = _ref.y_log,
y_log = _ref$y_log === undefined ? false : _ref$y_log,
_ref$xlabel = _ref.xlabel,
xlabel = _ref$xlabel === undefined ? '' : _ref$xlabel,
_ref$ylabel = _ref.ylabel,
ylabel = _ref$ylabel === undefined ? 'count' : _ref$ylabel,
_ref$title = _ref.title,
title = _ref$title === undefined ? null : _ref$title,
_ref$verbose = _ref.verbose,
verbose = _ref$verbose === undefined ? false : _ref$verbose;
if (!Array.isArray(df)) {
throw 'data must be a single data frame';
} else {
if (df[0].length != 2 || !df.map(function (row) {
return row[1];
}).filter(function (col) {
return Number.isFinite(col);
}).length) {
throw 'data must be a 2 column data.frame, with the first column being a set of timestamps, and the second coloumn being numeric values.';
}
}
if (max_anoms > 0.49) {
length = len(df);
throw 'max_anoms must me less then 50% of the data points (max_anoms =' + max_anoms * length + ' data_points =' + length;
}
if (!['pos', 'neg', 'both'].includes(direction)) {
throw 'direction options are: pos | neg | both.';
}
if (!(0.01 <= alpha || alpha <= 0.1)) {
if (verbose) console.warn('alpha is the statistical signifigance and is usually between 0.01 and 0.1');
}
if (only_last && !['day', 'hr'].includes(only_last)) {
throw 'only_last must be either \'day\' or \'hr\'';
}
if (![null, 'med_max', 'p95', 'p99'].includes(threshold)) {
throw 'threshold options are: None | med_max | p95 | p99';
}
if (!(typeof e_value === 'undefined' ? 'undefined' : _typeof(e_value)) === 'boolean') {
throw 'e_value must be a boolean';
}
if (!(typeof longterm === 'undefined' ? 'undefined' : _typeof(longterm)) === 'boolean') {
throw 'longterm must be a boolean';
}
if (piecewise_median_period_weeks < 2) {
throw 'piecewise_median_period_weeks must be at greater than 2 weeks';
}
if (!(typeof plot === 'undefined' ? 'undefined' : _typeof(plot)) === 'boolean') {
throw 'plot must be a boolean';
}
if (!(typeof y_log === 'undefined' ? 'undefined' : _typeof(y_log)) === 'boolean') {
throw 'y_log must be a boolean';
}
if (!(typeof xlabel === 'undefined' ? 'undefined' : _typeof(xlabel)) === 'string') {
throw 'xlabel must be a string';
}
if (!(typeof ylabel === 'undefined' ? 'undefined' : _typeof(ylabel)) === 'string') {
throw 'ylabel must be a string';
}
if (!(typeof title === 'undefined' ? 'undefined' : _typeof(title)) === 'string') {
throw 'title must be a string';
}
if (!title) {
title = '';
} else title = title + ':';
var gran = (0, _date_utils.get_gran)(df);
if (gran === 'day') {
var num_days_per_line = 7;
if (typeof only_last === 'string' && only_last === 'hr') {
only_last = 'day';
}
} else {
var num_days_per_line = 1;
}
if (gran === 'sec') {
var min_df = df.map(function (row) {
return [new Date(new Date(row[0]).toISOString().substr(0, 16) + ":00").toISOString(), row[1]];
});
var agg_df = {};
min_df.forEach(function (row) {
agg_df[row[0]] ? agg_df[row[0]] += row[1] : agg_df[row[0]] = row[1];
});
df = Object.keys(agg_df).map(function (key) {
return [key, agg_df[key]];
}).sort();
}
var gran_perod = {
'min': 1440,
'hr': 24,
'day': 7
};
var period = gran_perod[gran];
if (!period) {
throw gran + ' granularity detected. This is currently not supported.';
}
var num_obs = df.length;
var clamp = 1 / num_obs;
if (max_anoms < clamp) {
max_anoms = clamp;
}
if (longterm) {
if (gran === 'day') {
var num_obs_in_period = period * piecewise_median_period_weeks + 1;
var num_days_in_period = 7 * piecewise_median_period_weeks + 1;
} else {
var num_obs_in_period = period * 7 * piecewise_median_period_weeks;
var num_days_in_period = 7 * piecewise_median_period_weeks;
}
var last_date = df.slice(-1)[0][0];
var all_data = [];
for (var j = 0; j < df.length; j += num_obs_in_period) {
var start_date = new Date(df[j][0]);
var end_date = new Date(Math.min(start_date.getTime() + 1000 * 60 * 60 * 24 * num_days_in_period, new Date(df.slice(-1)[0][0]).getTime()));
if (Math.floor((end_date - start_date) / (1000 * 60 * 60 * 24)) === num_days_in_period) {
var sub_df = df.filter(function (row) {
return new Date(row[0]) >= start_date && new Date(row[0]) < end_date;
});
} else {
var sub_df = df.filter(function (row) {
return new Date(row[0]) > new Date(last_date.getTime() - 1000 * 60 * 60 * 24 * num_days_in_period) && new Date(row[0]) <= last_date;
});
}
all_data.push(sub_df);
}
} else {
var all_data = [df];
}
var all_anoms = [];
var seasonal_plus_trend = [];
for (var i in all_data) {
var directions = {
pos: { one_tail: true, upper_tail: true },
neg: { one_tail: true, upper_tail: false },
both: { one_tail: false, upper_tail: false }
};
var anomaly_direction = directions[direction];
var s_h_esd_timestamps = (0, _detect_anoms.detect_anoms)(all_data[i], { k: max_anoms, alpha: alpha, num_obs_per_period: period, use_decomp: true,
one_tail: anomaly_direction.one_tail, upper_tail: anomaly_direction.upper_tail, verbose: verbose });
var data_decomp = s_h_esd_timestamps.stl;
s_h_esd_timestamps = s_h_esd_timestamps.anoms;
if (s_h_esd_timestamps) {
var anoms = all_data[i].filter(function (datum) {
return s_h_esd_timestamps.indexOf(datum[0]) !== -1;
});
} else {
var anoms = [];
}
if (threshold) {
var date_group = {};
df.forEach(function (d) {
var date_string = new Date(d[0]).toLocaleDateString();
if (!date_group[date_string]) date_group[date_string] = [d[1]];else date_group[date_string].push(d[1]);
});
var periodic_maxes = Object.keys(date_group).map(function (date) {
return [date, Math.max.apply(Math, _toConsumableArray(date_group[date]))];
});
if (threshold === 'med_max') {
var thresh = median(periodic_maxes.map(function (max) {
return max[1];
}));
} else if (threshold === 'p95') {
var thresh = quantile(periodic_maxes.map(function (max) {
return max[1];
}), .95);
} else if (threshold === 'p99') {
var thresh = quantile(periodic_maxes.map(function (max) {
return max[1];
}), .99);
}
anoms = anoms.filter(function (anom) {
return anom[1] >= thresh;
});
}
all_anoms = all_anoms.concat(anoms);
seasonal_plus_trend = seasonal_plus_trend.concat(data_decomp);
}
if (only_last) {
var start_date = new Date(df.slice(0).sort(function (a, b) {
return new Date(a[0]) - new Date(b[0]);
}).slice(-1)[0][0]);
start_date.setDate(start_date.getDate() - 7);
var start_anoms = new Date(df.slice(0).sort(function (a, b) {
return new Date(a[0]) - new Date(b[0]);
}).slice(-1)[0][0]);
start_anoms.setDate(start_anoms.getDate() - 1);
if (gran === 'day') {
var breaks = 3 * 12;
num_days_per_line = 7;
} else {
if (only_last === 'day') {
var breaks = 12;
} else {
start_date = new Date(df.slice(0).sort(function (a, b) {
return new Date(a[0]) - new Date(b[0]);
}).slice(-1)[0][0]);
start_date.setDate(start_date.getDate() - 2);
start_date = new Date(start_date.toDateString());
start_anoms = new Date(df.slice(0).sort(function (a, b) {
return new Date(a[0]) - new Date(b[0]);
}).slice(-1)[0][0]);
start_anoms.setHours(start_anoms.getHours() - 1);
var breaks = 3;
}
}
var x_subset_single_day = df.filter(function (d) {
return new Date(d[0]) > start_anoms;
}).slice(0).sort(function (a, b) {
return new Date(a[0]) - new Date(b[0]);
});
var x_subset_week = df.filter(function (d) {
return new Date(d[0]) <= start_anoms && new Date(d[0]) > start_date;
});
if (all_anoms.length > 0) {
all_anoms = all_anoms.filter(function (anom) {
return new Date(anom[0]) >= new Date(x_subset_single_day[0][0]);
});
}
num_obs = x_subset_single_day.length;
}
var anom_pct = df.length / parseFloat(num_obs) * 100;
if (anom_pct === 0.) {
return {
anoms: null,
plot: null
};
}
if (e_value) {
var exp_val = seasonal_plus_trend.filter(function (st) {
return all_anoms.map(function (anom) {
return anom[0];
}).indexOf(st[0]) !== -1;
}).map(function (st) {
return st[1];
});
var d = all_anoms.map(function (a, idx) {
return {
timestamp: a[0],
anom: a[1],
expected_value: exp_val[idx]
};
});
} else {
var d = all_anoms.map(function (a) {
return {
timestamp: a[0],
anom: a[1]
};
});
}
return {
anoms: d,
plot: null
};
}
module.exports = detect_ts;
//# sourceMappingURL=detect_ts.js.map