UNPKG

pondjs

Version:

A timeseries library build on top of immutable.js

1,369 lines (1,270 loc) 53 kB
/* * Copyright (c) 2015-2017, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt * of any required approvals from the U.S. Dept. of Energy). * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ import _ from "underscore"; import Immutable from "immutable"; import Collection from "./collection"; import Index from "./index"; import Event from "./event"; import TimeEvent from "./timeevent"; import TimeRangeEvent from "./timerangeevent"; import IndexedEvent from "./indexedevent"; import { Pipeline } from "./pipeline.js"; function buildMetaData(meta) { let d = meta ? meta : {}; // Name d.name = meta.name ? meta.name : ""; // Index if (meta.index) { if (_.isString(meta.index)) { d.index = new Index(meta.index); } else if (meta.index instanceof Index) { d.index = meta.index; } } // UTC or Local time d.utc = true; if (_.isBoolean(meta.utc)) { d.utc = meta.utc; } return new Immutable.Map(d); } /** * A `TimeSeries` represents a series of events, with each event being a combination of: * * - time (or `TimeRange`, or `Index`) * - data - corresponding set of key/values. * * ### Construction * * Currently you can initialize a `TimeSeries` with either a list of events, or with a data format that looks like this: * * ```javascript * const data = { * name: "trafficc", * columns: ["time", "value"], * points: [ * [1400425947000, 52], * [1400425948000, 18], * [1400425949000, 26], * [1400425950000, 93], * ... * ] * }; * ``` * * To create a new TimeSeries object from the above format, simply use the constructor: * * ```javascript * const series = new TimeSeries(data); * ``` * * The format of the data is as follows: * * - **name** - optional, but a good practice * - **columns** - are necessary and give labels to the data in the points. * - **points** - are an array of tuples. Each row is at a different time (or timerange), and each value corresponds to the column labels. * * As just hinted at, the first column may actually be: * * - "time" * - "timeRange" represented by a `TimeRange` * - "index" - a time range represented by an `Index`. By using an index it is possible, for example, to refer to a specific month: * * ```javascript * const availabilityData = { * name: "Last 3 months availability", * columns: ["index", "uptime"], * points: [ * ["2015-06", "100%"], // <-- 2015-06 specified here represents June 2015 * ["2015-05", "92%"], * ["2015-04", "87%"], * ] * }; * ``` * * Alternatively, you can construct a `TimeSeries` with a list of events. * These may be `TimeEvents`, `TimeRangeEvents` or `IndexedEvents`. Here's an example of that: * * ```javascript * const events = []; * events.push(new TimeEvent(new Date(2015, 7, 1), {value: 27})); * events.push(new TimeEvent(new Date(2015, 8, 1), {value: 29})); * const series = new TimeSeries({ * name: "avg temps", * events: events * }); * ``` * * ### Nested data * * The values do not have to be simple types like the above examples. Here's an * example where each value is itself an object with "in" and "out" keys: * * ```javascript * const series = new TimeSeries({ * name: "Map Traffic", * columns: ["time", "NASA_north", "NASA_south"], * points: [ * [1400425951000, {in: 100, out: 200}, {in: 145, out: 135}], * [1400425952000, {in: 200, out: 400}, {in: 146, out: 142}], * [1400425953000, {in: 300, out: 600}, {in: 147, out: 158}], * [1400425954000, {in: 400, out: 800}, {in: 155, out: 175}], * ] * }); * ``` * * Complex data is stored in an Immutable structure. To get a value out of nested * data like this you will get the event you want (by row), as usual, and then use * `get()` to fetch the value by column name. The result of this call will be a * JSON copy of the Immutable data so you can query deeper in the usual way: * * ```javascript * series.at(0).get("NASA_north")["in"] // 200` * ``` * * It is then possible to use a value mapper function when calculating different * properties. For example, to get the average "in" value of the NASA_north column: * * ```javascript * series.avg("NASA_north", d => d.in); // 250 * ``` */ class TimeSeries { constructor(arg) { this._collection = null; // Collection this._data = null; // Meta data if (arg instanceof TimeSeries) { // // Copy another TimeSeries // const other = arg; this._data = other._data; this._collection = other._collection; } else if (_.isObject(arg)) { // // TimeSeries(object data) where data may be: // { "events": [event-1, event-2, ..., event-n]} // or // { "columns": [time|timerange|index, column-1, ..., column-n] // "points": [ // [t1, v1, v2, ..., v2], // [t2, v1, v2, ..., vn], // ... // ] // } const obj = arg; if (_.has(obj, "events")) { // // Initialized from an event list // const { events, ...meta1 } = obj; //eslint-disable-line this._collection = new Collection(events); this._data = buildMetaData(meta1); } else if (_.has(obj, "collection")) { // // Initialized from a Collection // const { collection, ...meta3 } = obj; //eslint-disable-line this._collection = collection; this._data = buildMetaData(meta3); } else if (_.has(obj, "columns") && _.has(obj, "points")) { // // Initialized from the wire format // const { columns, points, utc = true, ...meta2 } = obj; //eslint-disable-line const [eventKey, ...eventFields] = columns; const events = points.map(point => { const [t, ...eventValues] = point; const d = _.object(eventFields, eventValues); const options = utc; switch (eventKey) { case "time": return new TimeEvent(t, d, options); case "index": return new IndexedEvent(t, d, options); case "timerange": return new TimeRangeEvent(t, d, options); default: throw new Error("Unknown event type"); } }); this._collection = new Collection(events); this._data = buildMetaData(meta2); } if (!this._collection.isChronological()) { throw new Error("TimeSeries was passed non-chronological events"); } } } // // Serialize // /** * Turn the TimeSeries into regular javascript objects */ toJSON() { const e = this.atFirst(); if (!e) { return; } const columnList = this.columns(); let columns; if (e instanceof TimeEvent) { columns = ["time", ...columnList]; } else if (e instanceof TimeRangeEvent) { columns = ["timerange", ...columnList]; } else if (e instanceof IndexedEvent) { columns = ["index", ...columnList]; } const points = []; for (const e of this._collection.events()) { points.push(e.toPoint(columnList)); } return _.extend(this._data.toJSON(), { columns, points }); } /** * Represent the TimeSeries as a string */ toString() { return JSON.stringify(this.toJSON()); } /** * Returns the extents of the TimeSeries as a TimeRange. */ timerange() { return this._collection.range(); } /** * Alias for `timerange()` */ range() { return this.timerange(); } /** * Gets the earliest time represented in the TimeSeries. * * @return {Date} Begin time */ begin() { return this.range().begin(); } /** * Gets the latest time represented in the TimeSeries. * * @return {Date} End time */ end() { return this.range().end(); } /** * Access a specific TimeSeries event via its position * * @param {number} pos The event position */ at(pos) { return this._collection.at(pos); } /** * Returns an event in the series by its time. This is the same * as calling `bisect` first and then using `at` with the index. * * @param {Date} time The time of the event. * @return {TimeEvent|IndexedEvent|TimeRangeEvent} */ atTime(time) { const pos = this.bisect(time); if (pos >= 0 && pos < this.size()) { return this.at(pos); } } /** * Returns the first event in the series. * * @return {TimeEvent|IndexedEvent|TimeRangeEvent} */ atFirst() { return this._collection.atFirst(); } /** * Returns the last event in the series. * * @return {TimeEvent|IndexedEvent|TimeRangeEvent} */ atLast() { return this._collection.atLast(); } /** * Generator to return all the events in the series * * @example * ``` * for (let event of series.events()) { * console.log(event.toString()); * } * ``` */ *events() { for (let i = 0; i < this.size(); i++) { yield this.at(i); } } /** * Sets a new underlying collection for this TimeSeries. * * @param {Collection} collection The new collection * @param {boolean} isChronological Causes the chronological * order of the events to * not be checked * * @return {TimeSeries} A new TimeSeries */ setCollection(collection, isChronological = false) { if (!isChronological && !collection.isChronological()) { throw new Error("Collection supplied is not chronological"); } const result = new TimeSeries(this); if (collection) { result._collection = collection; } else { result._collection = new Collection(); } return result; } /** * Returns the index that bisects the TimeSeries at the time specified. * * @param {Date} t The time to bisect the TimeSeries with * @param {number} b The position to begin searching at * * @return {number} The row number that is the greatest, but still below t. */ bisect(t, b) { const tms = t.getTime(); const size = this.size(); let i = b || 0; if (!size) { return undefined; } for (; i < size; i++) { const ts = this.at(i) .timestamp() .getTime(); if (ts > tms) { return i - 1 >= 0 ? i - 1 : 0; } else if (ts === tms) { return i; } } return i - 1; } /** * Perform a slice of events within the TimeSeries, returns a new * TimeSeries representing a portion of this TimeSeries from * begin up to but not including end. * * @param {Number} begin The position to begin slicing * @param {Number} end The position to end slicing * * @return {TimeSeries} The new, sliced, TimeSeries. */ slice(begin, end) { const sliced = this._collection.slice(begin, end); return this.setCollection(sliced, true); } /** * Crop the TimeSeries to the specified TimeRange and * return a new TimeSeries. * * @param {TimeRange} timerange The bounds of the new TimeSeries * * @return {TimeSeries} The new, cropped, TimeSeries. */ crop(timerange) { const timerangeBegin = timerange.begin(); let beginPos = this.bisect(timerangeBegin); const bisectedEventOutsideRange = this.at(beginPos).timestamp() < timerangeBegin; beginPos = bisectedEventOutsideRange ? beginPos + 1 : beginPos; const endPos = this.bisect(timerange.end(), beginPos); return this.slice(beginPos, endPos + 1); } /** * Returns a new TimeSeries by testing the fieldPath * values for being valid (not NaN, null or undefined). * * The resulting TimeSeries will be clean (for that fieldPath). * * @param {string} fieldPath Name of value to look up. If not supplied, * defaults to ['value']. "Deep" syntax is * ['deep', 'value'] or 'deep.value' * * @return {TimeSeries} A new, modified, TimeSeries. */ clean(fieldSpec) { const cleaned = this._collection.clean(fieldSpec); return this.setCollection(cleaned, true); } /** * Generator to return all the events in the collection. * * @example * ``` * for (let event of timeseries.events()) { * console.log(event.toString()); * } * ``` */ *events() { for (let i = 0; i < this.size(); i++) { yield this.at(i); } } // // Access meta data about the series // /** * Fetch the timeseries name * * @return {string} The name given to this TimeSeries */ name() { return this._data.get("name"); } /** * Rename the timeseries */ setName(name) { return this.setMeta("name", name); } /** * Fetch the timeseries Index, if it has one. * * @return {Index} The Index given to this TimeSeries */ index() { return this._data.get("index"); } /** * Fetch the timeseries Index, as a string, if it has one. * * @return {string} The Index, as a string, given to this TimeSeries */ indexAsString() { return this.index() ? this.index().asString() : undefined; } /** * Fetch the timeseries `Index`, as a `TimeRange`, if it has one. * * @return {TimeRange} The `Index`, as a `TimeRange`, given to this `TimeSeries` */ indexAsRange() { return this.index() ? this.index().asTimerange() : undefined; } /** * Fetch the UTC flag, i.e. are the events in this `TimeSeries` in * UTC or local time (if they are `IndexedEvent`s an event might be * "2014-08-31". The actual time range of that representation * depends on where you are. Pond supports thinking about that in * either as a UTC day, or a local day). * * @return {TimeRange} The Index, as a TimeRange, given to this TimeSeries */ isUTC() { return this._data.get("utc"); } /** * Fetch the list of column names. This is determined by * traversing though the events and collecting the set. * * Note: the order is not defined * * @return {array} List of columns */ columns() { const c = {}; for (const e of this._collection.events()) { const d = e.toJSON().data; _.each(d, (val, key) => { c[key] = true; }); } return _.keys(c); } /** * Returns the internal `Collection` of events for this `TimeSeries` * * @return {Collection} The collection backing this `TimeSeries` */ collection() { return this._collection; } /** * Returns the meta data about this TimeSeries as a JSON object. * Any extra data supplied to the TimeSeries constructor will be * placed in the meta data object. This returns either all of that * data as a JSON object, or a specific key if `key` is supplied. * * @param {string} key Optional specific part of the meta data * @return {object} The meta data */ meta(key) { if (!key) { return this._data.toJSON(); } else { return this._data.get(key); } } /** * Set new meta data for the TimeSeries. The result will * be a new TimeSeries. */ setMeta(key, value) { const newTimeSeries = new TimeSeries(this); const d = newTimeSeries._data; const dd = d.set(key, value); newTimeSeries._data = dd; return newTimeSeries; } // // Access the series itself // /** * Returns the number of events in this TimeSeries * * @return {number} Count of events */ size() { return this._collection ? this._collection.size() : 0; } /** * Returns the number of valid items in this TimeSeries. * * Uses the fieldSpec to look up values in all events. * It then counts the number that are considered valid, which * specifically are not NaN, undefined or null. * * @return {number} Count of valid events */ sizeValid(fieldSpec) { return this._collection.sizeValid(fieldSpec); } /** * Returns the number of events in this TimeSeries. Alias * for size(). * * @return {number} Count of events */ count() { return this.size(); } /** * Returns the sum for the fieldspec * * @param {string} fieldPath Column to find the stdev of. A deep value can * be referenced with a string.like.this. If not supplied * the `value` column will be aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The sum */ sum(fieldPath, filter) { return this._collection.sum(fieldPath, filter); } /** * Aggregates the events down to their maximum value * * @param {string} fieldPath Column to find the max of. A deep value can * be referenced with a string.like.this. If not supplied * the `value` column will be aggregated. * * @return {number} The max value for the field */ max(fieldPath, filter) { return this._collection.max(fieldPath, filter); } /** * Aggregates the events down to their minimum value * * @param {string} fieldPath Column to find the min of. A deep value can * be referenced with a string.like.this. If not supplied * the `value` column will be aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The min value for the field */ min(fieldPath, filter) { return this._collection.min(fieldPath, filter); } /** * Aggregates the events in the TimeSeries down to their average * * @param {string} fieldPath Column to find the avg of. A deep value can * be referenced with a string.like.this. If not supplied * the `value` column will be aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The average */ avg(fieldPath, filter) { return this._collection.avg(fieldPath, filter); } /** * Aggregates the events in the TimeSeries down to their mean (same as avg) * * @param {string} fieldPath Column to find the mean of. A deep value can * be referenced with a string.like.this. If not supplied * the `value` column will be aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The mean */ mean(fieldPath, filter) { return this._collection.mean(fieldPath, filter); } /** * Aggregates the events down to their medium value * * @param {string} fieldPath Column to find the median of. A deep value can * be referenced with a string.like.this. If not supplied * the `value` column will be aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The resulting median value */ median(fieldPath, filter) { return this._collection.median(fieldPath, filter); } /** * Aggregates the events down to their stdev * * @param {string} fieldPath Column to find the stdev of. A deep value can * be referenced with a string.like.this. If not supplied * the `value` column will be aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The resulting stdev value */ stdev(fieldPath, filter) { return this._collection.stdev(fieldPath, filter); } /** * Gets percentile q within the TimeSeries. This works the same way as numpy. * * @param {integer} q The percentile (should be between 0 and 100) * * @param {string} fieldPath Column to find the qth percentile of. A deep value can * be referenced with a string.like.this. If not supplied * the `value` column will be aggregated. * * @param {string} interp Specifies the interpolation method * to use when the desired quantile lies between * two data points. Options are: "linear", "lower", "higher", * "nearest", "midpoint" * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The percentile */ percentile(q, fieldPath, interp = "linear", filter) { return this._collection.percentile(q, fieldPath, interp, filter); } /** * Aggregates the events down using a user defined function to * do the reduction. * * @param {function} func User defined reduction function. Will be * passed a list of values. Should return a * singe value. * @param {string} fieldPath Column to aggregate over. A deep value can * be referenced with a string.like.this. If not supplied * the `value` column will be aggregated. * * @return {number} The resulting value */ aggregate(func, fieldPath) { return this._collection.aggregate(func, fieldPath); } /** * Gets n quantiles within the TimeSeries. This works the same way as numpy's percentile(). * For example `timeseries.quantile(4)` would be the same as using percentile with q = 0.25, 0.5 and 0.75. * * @param {integer} n The number of quantiles to divide the * TimeSeries into. * @param {string} fieldPath Column to calculate over. A deep value can * be referenced with a string.like.this. If not supplied * the `value` column will be aggregated. * @param {string} interp Specifies the interpolation method * to use when the desired quantile lies between * two data points. Options are: "linear", "lower", "higher", * "nearest", "midpoint". * @return {array} An array of n quantiles */ quantile(quantity, fieldPath = "value", interp = "linear") { return this._collection.quantile(quantity, fieldPath, interp); } /** * Returns a new Pipeline with input source being initialized to * this TimeSeries collection. This allows pipeline operations * to be chained directly onto the TimeSeries to produce a new * TimeSeries or event result. * * @example * * ``` * timeseries.pipeline() * .offsetBy(1) * .offsetBy(2) * .to(CollectionOut, c => out = c); * ``` * * @return {Pipeline} The Pipeline. */ pipeline() { return new Pipeline().from(this._collection); } /** * Takes an operator that is used to remap events from this TimeSeries to * a new set of events. * * @param {function} operator An operator which will be passed each * event and which should return a new event. * @return {TimeSeries} A TimeSeries containing the remapped events */ map(op) { const collections = this.pipeline() .map(op) .toKeyedCollections(); return this.setCollection(collections["all"], true); } /** * Takes a fieldSpec (list of column names) and outputs to the callback just those * columns in a new TimeSeries. * * @example * * ``` * const ts = timeseries.select({fieldSpec: ["uptime", "notes"]}); * ``` * * @param options An object containing options for the command * @param {string|array} options.fieldSpec Column or columns to select into the new TimeSeries. * If you need to retrieve multiple deep nested values * that ['can.be', 'done.with', 'this.notation']. * A single deep value with a string.like.this. * * @return {TimeSeries} The resulting TimeSeries with renamed columns */ select(options) { const { fieldSpec } = options; const collections = this.pipeline() .select(fieldSpec) .toKeyedCollections(); return this.setCollection(collections["all"], true); } /** * Takes a `fieldSpecList` (list of column names) and collapses * them to a new column named `name` which is the reduction (using * the `reducer` function) of the matched columns in the `fieldSpecList`. * * The column may be appended to the existing columns, or replace them, * based on the `append` boolean. * * @example * * ``` * const sums = ts.collapse({ * name: "sum_series", * fieldSpecList: ["in", "out"], * reducer: sum(), * append: false * }); * ``` * * @param options An object containing options: * @param {array} options.fieldSpecList The list of columns to collapse. (required) * @param {string} options.name The resulting collapsed column name (required) * @param {function} options.reducer The reducer function (required) * @param {bool} options.append Append the collapsed column, rather * than replace * * @return {TimeSeries} The resulting collapsed TimeSeries */ collapse(options) { const { fieldSpecList, name, reducer, append } = options; const collections = this.pipeline() .collapse(fieldSpecList, name, reducer, append) .toKeyedCollections(); return this.setCollection(collections["all"], true); } /** * Rename columns in the underlying events. * * Takes a object of columns to rename. Returns a new `TimeSeries` containing * new events. Columns not in the dict will be retained and not renamed. * * @example * ``` * new_ts = ts.renameColumns({ * renameMap: {in: "new_in", out: "new_out"} * }); * ``` * * @note As the name implies, this will only rename the main * "top level" (ie: non-deep) columns. If you need more * extravagant renaming, roll your own using `TimeSeries.map()`. * * @param options An object containing options: * @param {Object} options.renameMap Columns to rename. * * @return {TimeSeries} The resulting TimeSeries with renamed columns */ renameColumns(options) { const { renameMap } = options; return this.map(event => { const eventType = event.type(); const d = event.data().mapKeys(key => renameMap[key] || key); return new eventType(event.key(), d); }); } /** * Take the data in this TimeSeries and "fill" any missing or invalid * values. This could be setting `null` values to zero so mathematical * operations will succeed, interpolate a new value, or pad with the * previously given value. * * The `fill()` method takes a single `options` arg. * * @example * ``` * const filled = timeseries.fill({ * fieldSpec: ["direction.in", "direction.out"], * method: "zero", * limit: 3 * }); * ``` * * @param options An object containing options: * @param {string|array} options.fieldSpec Column or columns to fill. If you need to * retrieve multiple deep nested values * that ['can.be', 'done.with', 'this.notation']. * A single deep value with a string.like.this. * @param {string} options.method "linear" or "pad" or "zero" style interpolation * @param {number} options.limit The maximum number of points which should be * interpolated onto missing points. You might set this to * 2 if you are willing to fill 2 new points, * and then beyond that leave data with missing values. * * @return {TimeSeries} The resulting filled TimeSeries */ fill(options) { const { fieldSpec = null, method = "zero", limit = null } = options; let pipeline = this.pipeline(); if (method === "zero" || method === "pad") { pipeline = pipeline.fill({ fieldSpec, method, limit }); } else if (method === "linear" && _.isArray(fieldSpec)) { fieldSpec.forEach(fieldPath => { pipeline = pipeline.fill({ fieldSpec: fieldPath, method, limit }); }); } else { throw new Error("Invalid fill method:", method); } const collections = pipeline.toKeyedCollections(); return this.setCollection(collections["all"], true); } /** * Align event values to regular time boundaries. The value at * the boundary is interpolated. Only the new interpolated * points are returned. If limit is reached nulls will be * returned at each boundary position. * * One use case for this is to modify irregular data (i.e. data * that falls at slightly irregular times) so that it falls into a * sequence of evenly spaced values. We use this to take data we * get from the network which is approximately every 30 second * (:32, 1:02, 1:34, ...) and output data on exact 30 second * boundaries (:30, 1:00, 1:30, ...). * * Another use case is data that might be already aligned to * some regular interval, but that contains missing points. * While `fill()` can be used to replace `null` values, `align()` * can be used to add in missing points completely. Those points * can have an interpolated value, or by setting limit to 0, * can be filled with nulls. This is really useful when downstream * processing depends on complete sequences. * * @example * ``` * const aligned = ts.align({ * fieldSpec: "value", * period: "1m", * method: "linear" * }); * ``` * * @param options An object containing options: * @param {string|array} options.fieldSpec Column or columns to align. If you need to * retrieve multiple deep nested values * that ['can.be', 'done.with', 'this.notation']. * A single deep value with a string.like.this. * @param {string} options.period Spacing of aligned values. e.g. "6h" or "5m" * @param {string} options.method "linear" or "pad" style interpolation to boundaries. * @param {number} options.limit The maximum number of points which should be * interpolated onto boundaries. You might set this to * 2 if you are willing to interpolate 2 new points, * and then beyond that just emit nulls on the boundaries. * * @return {TimeSeries} The resulting aligned TimeSeries */ align(options) { const { fieldSpec = "value", period = "5m", method = "linear", limit = null } = options; const collection = this.pipeline() .align(fieldSpec, period, method, limit) .toKeyedCollections(); return this.setCollection(collection["all"], true); } /** * Returns the derivative of the TimeSeries for the given columns. The result will * be per second. Optionally you can substitute in `null` values if the rate * is negative. This is useful when a negative rate would be considered invalid. * * @param options An object containing options: * @param {string|array} options.fieldSpec Column or columns to get the rate of. If you * need to retrieve multiple deep nested values * that ['can.be', 'done.with', 'this.notation']. * @param {bool} options.allowNegative Will output null values for negative rates. * This is useful if you are getting the rate * of a counter that always goes up, except * when perhaps it rolls around or resets. * * @return {TimeSeries} The resulting `TimeSeries` containing calculated rates. */ rate(options = {}) { const { fieldSpec = "value", allowNegative = true } = options; const collection = this.pipeline() .rate(fieldSpec, allowNegative) .toKeyedCollections(); return this.setCollection(collection["all"], true); } /** * Builds a new TimeSeries by dividing events within the TimeSeries * across multiple fixed windows of size `windowSize`. * * Note that these are windows defined relative to Jan 1st, 1970, * and are UTC, so this is best suited to smaller window sizes * (hourly, 5m, 30s, 1s etc), or in situations where you don't care * about the specific window, just that the data is smaller. * * Each window then has an aggregation specification applied as * `aggregation`. This specification describes a mapping of output * fieldNames to aggregation functions and their fieldPath. For example: * ``` * { in_avg: { in: avg() }, out_avg: { out: avg() } } * ``` * will aggregate both "in" and "out" using the average aggregation * function and return the result as in_avg and out_avg. * * Note that each aggregation function, such as `avg()` also can take a * filter function to apply before the aggregation. A set of filter functions * exists to do common data cleanup such as removing bad values. For example: * ``` * { value_avg: { value: avg(filter.ignoreMissing) } } * ``` * * @example * ``` * const timeseries = new TimeSeries(data); * const dailyAvg = timeseries.fixedWindowRollup({ * windowSize: "1d", * aggregation: {value: {value: avg()}} * }); * ``` * * @param options An object containing options: * @param {string} options.windowSize The size of the window. e.g. "6h" or "5m" * @param {object} options.aggregation The aggregation specification (see description above) * @param {bool} options.toTimeEvents Output as `TimeEvent`s, rather than `IndexedEvent`s * @return {TimeSeries} The resulting rolled up `TimeSeries` */ fixedWindowRollup(options) { const { windowSize, aggregation, toTimeEvents = false } = options; if (!windowSize) { throw new Error( "windowSize must be supplied, for example '5m' for five minute rollups" ); } if (!aggregation || !_.isObject(aggregation)) { throw new Error( "aggregation object must be supplied, for example: {value: {value: avg()}}" ); } const aggregatorPipeline = this.pipeline() .windowBy(windowSize) .emitOn("discard") .aggregate(aggregation); const eventTypePipeline = toTimeEvents ? aggregatorPipeline.asTimeEvents() : aggregatorPipeline; const collections = eventTypePipeline.clearWindow().toKeyedCollections(); return this.setCollection(collections["all"], true); } /** * Builds a new TimeSeries by dividing events into hours. * * Each window then has an aggregation specification `aggregation` * applied. This specification describes a mapping of output * fieldNames to aggregation functions and their fieldPath. For example: * ``` * {in_avg: {in: avg()}, out_avg: {out: avg()}} * ``` * * @param options An object containing options: * @param {bool} options.toTimeEvents Convert the rollup events to `TimeEvent`s, otherwise it * will be returned as a `TimeSeries` of `IndexedEvent`s. * @param {object} options.aggregation The aggregation specification (see description above) * * @return {TimeSeries} The resulting rolled up TimeSeries */ hourlyRollup(options) { const { aggregation, toTimeEvents = false } = options; if (!aggregation || !_.isObject(aggregation)) { throw new Error( "aggregation object must be supplied, for example: {value: {value: avg()}}" ); } return this.fixedWindowRollup("1h", aggregation, toTimeEvents); } /** * Builds a new TimeSeries by dividing events into days. * * Each window then has an aggregation specification `aggregation` * applied. This specification describes a mapping of output * fieldNames to aggregation functions and their fieldPath. For example: * ``` * {in_avg: {in: avg()}, out_avg: {out: avg()}} * ``` * * @param options An object containing options: * @param {bool} options.toTimeEvents Convert the rollup events to `TimeEvent`s, otherwise it * will be returned as a `TimeSeries` of `IndexedEvent`s. * @param {object} options.aggregation The aggregation specification (see description above) * * @return {TimeSeries} The resulting rolled up TimeSeries */ dailyRollup(options) { const { aggregation, toTimeEvents = false } = options; if (!aggregation || !_.isObject(aggregation)) { throw new Error( "aggregation object must be supplied, for example: {value: {value: avg()}}" ); } return this._rollup("daily", aggregation, toTimeEvents); } /** * Builds a new TimeSeries by dividing events into months. * * Each window then has an aggregation specification `aggregation` * applied. This specification describes a mapping of output * fieldNames to aggregation functions and their fieldPath. For example: * ``` * {in_avg: {in: avg()}, out_avg: {out: avg()}} * ``` * * @param options An object containing options: * @param {bool} options.toTimeEvents Convert the rollup events to `TimeEvent`s, otherwise it * will be returned as a `TimeSeries` of `IndexedEvent`s. * @param {object} options.aggregation The aggregation specification (see description above) * * @return {TimeSeries} The resulting rolled up `TimeSeries` */ monthlyRollup(options) { const { aggregation, toTimeEvents = false } = options; if (!aggregation || !_.isObject(aggregation)) { throw new Error( "aggregation object must be supplied, for example: {value: {value: avg()}}" ); } return this._rollup("monthly", aggregation, toTimeEvents); } /** * Builds a new TimeSeries by dividing events into years. * * Each window then has an aggregation specification `aggregation` * applied. This specification describes a mapping of output * fieldNames to aggregation functions and their fieldPath. For example: * * ``` * {in_avg: {in: avg()}, out_avg: {out: avg()}} * ``` * * @param options An object containing options: * @param {bool} options.toTimeEvents Convert the rollup events to `TimeEvent`s, otherwise it * will be returned as a `TimeSeries` of `IndexedEvent`s. * @param {object} options.aggregation The aggregation specification (see description above) * * @return {TimeSeries} The resulting rolled up `TimeSeries` */ yearlyRollup(options) { const { aggregation, toTimeEvents = false } = options; if (!aggregation || !_.isObject(aggregation)) { throw new Error( "aggregation object must be supplied, for example: {value: {value: avg()}}" ); } return this._rollup("yearly", aggregation, toTimeEvents); } /** * @private * * Internal function to build the TimeSeries rollup functions using * an aggregator Pipeline. */ _rollup(type, aggregation, toTimeEvents = false) { const aggregatorPipeline = this.pipeline() .windowBy(type) .emitOn("discard") .aggregate(aggregation); const eventTypePipeline = toTimeEvents ? aggregatorPipeline.asTimeEvents() : aggregatorPipeline; const collections = eventTypePipeline.clearWindow().toKeyedCollections(); return this.setCollection(collections["all"], true); } /** * Builds multiple `Collection`s, each collects together * events within a window of size `windowSize`. Note that these * are windows defined relative to Jan 1st, 1970, and are UTC. * * @example * ``` * const timeseries = new TimeSeries(data); * const collections = timeseries.collectByFixedWindow({windowSize: "1d"}); * console.log(collections); // {1d-16314: Collection, 1d-16315: Collection, ...} * ``` * * @param options An object containing options: * @param {bool} options.windowSize The size of the window. e.g. "6h" or "5m" * * @return {map} The result is a mapping from window index to a Collection. */ collectByFixedWindow({ windowSize }) { return this.pipeline() .windowBy(windowSize) .emitOn("discard") .toKeyedCollections(); } /* * STATIC */ /** * Defines the event type contained in this TimeSeries. The default here * is to use the supplied type (time, timerange or index) to build either * a TimeEvent, TimeRangeEvent or IndexedEvent. However, you can also * subclass the TimeSeries and reimplement this to return another event * type. */ static event(eventKey) { switch (eventKey) { case "time": return TimeEvent; case "timerange": return TimeRangeEvent; case "index": return IndexedEvent; default: throw new Error(`Unknown event type: ${eventKey}`); } } /** * Static function to compare two TimeSeries to each other. If the TimeSeries * are of the same instance as each other then equals will return true. * @param {TimeSeries} series1 * @param {TimeSeries} series2 * @return {bool} result */ static equal(series1, series2) { return series1._data === series2._data && series1._collection === series2._collection; } /** * Static function to compare two TimeSeries to each other. If the TimeSeries * are of the same value as each other then equals will return true. * @param {TimeSeries} series1 * @param {TimeSeries} series2 * @return {bool} result */ static is(series1, series2) { return ( Immutable.is(series1._data, series2._data) && Collection.is(series1._collection, series2._collection) ); } /** * Reduces a list of TimeSeries objects using a reducer function. This works * by taking each event in each TimeSeries and collecting them together * based on timestamp. All events for a given time are then merged together * using the reducer function to produce a new event. The reducer function is * applied to all columns in the fieldSpec. Those new events are then * collected together to form a new TimeSeries. * * @example * * For example you might have three TimeSeries with columns "in" and "out" which * corresponds to two measurements per timestamp. You could use this function to * obtain a new TimeSeries which was the sum of the the three measurements using * the `sum()` reducer function and an ["in", "out"] fieldSpec. * * ``` * const totalSeries = TimeSeries.timeSeriesListReduce({ * name: "totals", * seriesList: [inTraffic, outTraffic], * reducer: sum(), * fieldSpec: [ "in", "out" ] * }); * ``` * * @param options An object containing options. Additional key * values in the options will be added as meta data * to the resulting TimeSeries. * @param {array} options.seriesList A list of `TimeSeries` (required) * @param {function} options.reducer The reducer function e.g. `max()` (required) * @param {array | string} options.fieldSpec Column or columns to reduce. If you * need to retrieve multiple deep * nested values that ['can.be', 'done.with', * 'this.notation']. A single deep value with a * string.like.this. * * @return {TimeSeries} The reduced TimeSeries */ static timeSeriesListReduce(options) { const { fieldSpec, reducer, ...data } = options; const combiner = Event.combiner(fieldSpec, reducer); return TimeSeries.timeSeriesListEventReduce({ fieldSpec, reducer: combiner, ...data }); } /** * Takes a list of TimeSeries and merges them together to form a new * Timeseries. * * Merging will produce a new Event; only when events are conflict free, so * it is useful in the following cases: * * to combine multiple TimeSeries which have different time ranges, essentially * concatenating them together * * combine TimeSeries which have different columns, for example inTraffic has * a column "in" and outTraffic has a column "o