UNPKG

pondjs

Version:

A timeseries library build on top of immutable.js

776 lines (723 loc) 25.7 kB
/* * Copyright (c) 2016-2017, The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt * of any required approvals from the U.S. Dept. of Energy). * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ import _ from "underscore"; import Immutable from "immutable"; import Bounded from "./io/bounded"; import Event from "./event"; import TimeRange from "./timerange"; import util from "./base/util"; import { sum, avg, max, min, first, last, median, stdev, percentile } from "./base/functions"; /** * A collection is an abstraction for a bag of Events. * * You typically construct a Collection from a list of Events, which * may be either within an Immutable.List or an Array. You can also * copy another Collection or create an empty one. * * You can mutate a collection in a number of ways. In each instance * a new Collection will be returned. * * Basic operations on the list of events are also possible. You * can iterate over the collection with a for..of loop, get the size() * of the collection and access a specific element with at(). * * You can also perform aggregations of the events, map them, filter them * clean them, etc. * * Collections form the backing structure for a TimeSeries, as well as * in Pipeline event processing. They are an instance of a BoundedIn, so * they can be used as a pipeline source. */ class Collection extends Bounded { /** * Construct a new Collection. * * @param {Collection|array|Immutable.List} arg1 Initial data for * the collection. If arg1 is another Collection, this will act as * a copy constructor. * @param {Boolean} [arg2] When using a the copy constructor * this specified whether or not to also copy all the events in this * collection. Generally you'll want to let it copy the events. * If arg1 is an Immutable.List, then arg2 will specify the type of * the Events accepted into the Collection. This form is generally * used internally. * * @return {Collection} The constructed Collection. */ constructor(arg1, arg2) { super(); this._id = _.uniqueId("collection-"); this._eventList = null; // The events in this collection this._type = null; // The type (class) of the events in this collection if (!arg1) { this._eventList = new Immutable.List(); } else if (arg1 instanceof Collection) { const other = arg1; const copyEvents = arg2 || true; // copyEvents is whether to copy events from other, default is true if (_.isUndefined(copyEvents) || copyEvents === true) { this._eventList = other._eventList; this._type = other._type; } else { this._eventList = new Immutable.List(); } } else if (_.isArray(arg1)) { const events = []; arg1.forEach(e => { this._check(e); events.push(e._d); }); this._eventList = new Immutable.List(events); } else if (Immutable.List.isList(arg1)) { const type = arg2; if (!type) { throw new Error("No type supplied to Collection constructor"); } this._type = type; this._eventList = arg1; } } /** * Returns the Collection as a regular JSON object. * * @return {Object} The JSON representation of this Collection */ toJSON() { return this._eventList.toJS(); } /** * Serialize out the Collection as a string. This will be the * string representation of `toJSON()`. * * @return {string} The Collection serialized as a string. */ toString() { return JSON.stringify(this.toJSON()); } /** * Returns the Event object type in this Collection. * * Since Collections may only have one type of event (`Event`, `IndexedEvent` * or `TimeRangeEvent`) this will return that type. If no events * have been added to the Collection it will return `undefined`. * * @return {Event} - The class of the type of events contained in * this Collection. */ type() { return this._type; } /** * Returns the number of events in this collection * * @return {number} Count of events */ size() { return this._eventList.size; } /** * Returns the number of valid items in this collection. * * Uses the fieldPath to look up values in all events. * It then counts the number that are considered valid, which * specifically are not NaN, undefined or null. * * @return {number} Count of valid events */ sizeValid(fieldPath = "value") { let count = 0; for (const e of this.events()) { if (Event.isValidValue(e, fieldPath)) count++; } return count; } /** * Returns an event in the Collection by its position. * @example * ``` * for (let row=0; row < series.size(); row++) { * const event = series.at(row); * console.log(event.toString()); * } * ``` * @param {number} pos The position of the event * @return {Event} Returns the event at the pos specified. */ at(pos) { if (this._eventList.size > 0) { const event = new this._type(this._eventList.get(pos)); return event; } } /** * Returns a list of events in the Collection which have * the exact key (time, timerange or index) as the key specified * by 'at'. Note that this is an O(n) search for the time specified, * since collections are an unordered bag of events. * * @param {Date|string|TimeRange} key The key of the event. * @return {Array} All events at that key */ atKey(k) { const result = []; let key; if (k instanceof Date) { key = k.getTime(); } else if (_.isString(k)) { key = k; } else if (k instanceof TimeRange) { key = `${this.timerange().begin()},${this.timerange().end()}`; } for (const e of this.events()) { if (e.key() === key) { result.push(e); } } return result; } /** * Returns the first event in the Collection. * * @return {Event} */ atFirst() { if (this.size()) { return this.at(0); } } /** * Returns the last event in the Collection. * * @return {Event} */ atLast() { if (this.size()) { return this.at(this.size() - 1); } } /** * Generator to return all the events in the Collection. * * @example * ``` * for (let event of collection.events()) { * console.log(event.toString()); * } * ``` */ *events() { for (let i = 0; i < this.size(); i++) { yield this.at(i); } } setEvents(events) { const result = new Collection(this); result._eventList = events; return result; } /** * Returns the raw Immutable event list * * @return {Immutable.List} All events as an Immutable List. */ eventList() { return this._eventList; } /** * Returns a Javascript array representation of the event list * * @return {Array} All events as a Javascript Array. */ eventListAsArray() { const events = []; for (const e of this.events()) { events.push(e); } return events; } /** * Returns the events in the collection as a Javascript Map, where * the key is the timestamp, index or timerange and the * value is an array of events with that key. * * @return {map} The map of events */ eventListAsMap() { const events = {}; for (const e of this.events()) { const key = e.key(); if (!_.has(events, key)) { events[key] = []; } events[key].push(e); } return events; } // // De-duplicate // /** * Removes duplicates from the Collection. If duplicates * exist in the collection with the same key but with different * values, then later event values will be used. * * @return {Collection} The sorted Collection. */ dedup() { const events = Event.merge(this.eventListAsArray()); return new Collection(events); } // // Sorting // /** * Sorts the Collection by the timestamp. In the case * of TimeRangeEvents and IndexedEvents, it will be sorted * by the begin time. This is useful when the collection * will be passed into a TimeSeries. * * See also isChronological(). * * @return {Collection} The sorted Collection */ sortByTime() { const sorted = this._eventList.sortBy(event => { const e = new this._type(event); return e.timestamp().getTime(); }); return this.setEvents(sorted); } /** * Sorts the Collection using the value referenced by * the fieldPath. * * @return {Collection} The extents of the Collection */ sort(fieldPath) { const fs = util.fieldPathToArray(fieldPath); const sorted = this._eventList.sortBy(event => { const e = new this._type(event); return e.get(fs); }); return this.setEvents(sorted); } // // Series range // /** * From the range of times, or Indexes within the TimeSeries, return * the extents of the TimeSeries as a TimeRange. This is currently implemented * by walking the events. * * @return {TimeRange} The extents of the TimeSeries */ range() { let min; let max; for (const e of this.events()) { if (!min || e.begin() < min) min = e.begin(); if (!max || e.end() > max) max = e.end(); } if (min && max) return new TimeRange(min, max); } // // Collection mutation // /** * Adds an event to the collection, returns a new Collection. The event added * can be an Event, TimeRangeEvent or IndexedEvent, but it must be of the * same type as other events within the Collection. * * @param {Event} event The event being added. * * @return {Collection} A new, modified, Collection containing the new event. */ addEvent(event) { this._check(event); const result = new Collection(this); result._eventList = this._eventList.push(event._d); return result; } /** * Perform a slice of events within the Collection, returns a new * Collection representing a portion of this TimeSeries from begin up to * but not including end. * * @param {Number} begin The position to begin slicing * @param {Number} end The position to end slicing * * @return {Collection} The new, sliced, Collection. */ slice(begin, end) { return new Collection(this._eventList.slice(begin, end), this._type); } /** * Filter the collection's event list with the supplied function * * @param {function} func The filter function, that should return * true or false when passed in an event. * * @return {Collection} A new, filtered, Collection. */ filter(filterFunc) { const filteredEventList = []; for (const e of this.events()) { if (filterFunc(e)) { filteredEventList.push(e); } } return new Collection(filteredEventList); } /** * Map the collection's event list to a new event list with * the supplied function. * @param {function} func The mapping function, that should return * a new event when passed in the old event. * * @return {Collection} A new, modified, Collection. */ map(mapFunc) { const result = []; for (const e of this.events()) { result.push(mapFunc(e)); } return new Collection(result); } /** * Returns a new Collection by testing the fieldPath * values for being valid (not NaN, null or undefined). * * The resulting Collection will be clean (for that fieldPath). * * @param {string} fieldPath Name of value to look up. If not supplied, * defaults to ['value']. "Deep" syntax is * ['deep', 'value'] or 'deep.value' * * @return {Collection} A new, modified, Collection. */ clean(fieldPath) { const fs = util.fieldPathToArray(fieldPath); const filteredEvents = []; for (const e of this.events()) { if (Event.isValidValue(e, fs)) { filteredEvents.push(e); } } return new Collection(filteredEvents); } // // Aggregate the event list to a single value // /** * Returns the number of events in this collection * * @return {number} The number of events */ count() { return this.size(); } /** * Returns the first value in the Collection for the fieldspec * * @param {string} fieldPath Column to find the first value of. A deep value can be referenced with a * string.like.this. If not supplied the `value` column will be * aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The first value */ first(fieldPath, filter) { return this.aggregate(first(filter), fieldPath); } /** * Returns the last value in the Collection for the fieldspec * * @param {string} fieldPath Column to find the last value of. A deep value can be referenced with a * string.like.this. If not supplied the `value` column will be * aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The last value */ last(fieldPath, filter) { return this.aggregate(last(filter), fieldPath); } /** * Returns the sum of the Collection for the fieldspec * * @param {string} fieldPath Column to find the sum of. A deep value can be referenced with a * string.like.this. If not supplied the `value` column will be * aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The sum */ sum(fieldPath, filter) { return this.aggregate(sum(filter), fieldPath); } /** * Aggregates the events down to their average(s) * * @param {string} fieldPath Column to find the avg of. A deep value can be referenced with a * string.like.this. If not supplied the `value` column will be * aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The average */ avg(fieldPath, filter) { return this.aggregate(avg(filter), fieldPath); } /** * Aggregates the events down to their maximum value * * @param {string} fieldPath Column to find the max of. A deep value can be referenced with a * string.like.this. If not supplied the `value` column will be * aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The max value for the field */ max(fieldPath, filter) { return this.aggregate(max(filter), fieldPath); } /** * Aggregates the events down to their minimum value * * @param {string} fieldPath Column to find the min of. A deep value can be referenced with a * string.like.this. If not supplied the `value` column will be * aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The min value for the field */ min(fieldPath, filter) { return this.aggregate(min(filter), fieldPath); } /** * Aggregates the events down to their mean (same as avg) * * @param {string} fieldPath Column to find the mean of. A deep value can be referenced with a * string.like.this. If not supplied the `value` column will be * aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The mean */ mean(fieldPath, filter) { return this.avg(fieldPath, filter); } /** * Aggregates the events down to their minimum value * * @param {string} fieldPath Column to find the median of. A deep value can be referenced with a * string.like.this. If not supplied the `value` column will be * aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The median value */ median(fieldPath, filter) { return this.aggregate(median(filter), fieldPath); } /** * Aggregates the events down to their stdev * * @param {string} fieldPath Column to find the stdev of. A deep value can be referenced with a * string.like.this. If not supplied the `value` column will be * aggregated. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The resulting stdev value */ stdev(fieldPath, filter) { return this.aggregate(stdev(filter), fieldPath); } /** * Gets percentile q within the Collection. This works the same way as numpy. * * @param {integer} q The percentile (should be between 0 and 100) * * @param {string} fieldPath Column to find the percentile of. A deep value can be referenced with a * string.like.this. If not supplied the `value` column will be * aggregated. * * @param {string} interp Specifies the interpolation method * to use when the desired quantile lies between * two data points. Options are: * options are: * * linear: i + (j - i) * fraction, where fraction is the fractional part of the index surrounded by i and j. * * lower: i. * * higher: j. * * nearest: i or j whichever is nearest. * * midpoint: (i + j) / 2. * @param {function} filter Optional filter function used to clean data before aggregating * * @return {number} The percentile */ percentile(q, fieldPath, interp = "linear", filter) { return this.aggregate(percentile(q, interp, filter), fieldPath); } /** * Aggregates the events down using a user defined function to * do the reduction. * * @param {function} func User defined reduction function. Will be * passed a list of values. Should return a * singe value. * * @param {String} fieldPath The field to aggregate over * * @return {number} The resulting value */ aggregate(func, fieldPath, options = {}) { let fpath; if (!_.isFunction(func)) { throw new Error("First arg to aggregate() must be a function"); } if (_.isString(fieldPath)) { fpath = fieldPath; } else if (_.isArray(fieldPath)) { // if the ['array', 'style', 'fieldpath'] is being used, // we need to turn it back into a string since we are // using a subset of the the map() functionality on // a single column fpath = fieldPath.split("."); } else if (_.isUndefined(fieldPath)) { // map() needs a field name to use as a key. Normally // this case is normally handled by _field_spec_to_array() // inside get(). Also, if map(func, field_spec=None) then // it will map all the columns. fpath = "value"; } else { throw new Error( "Collection.aggregate() takes a string/array fieldPath" ); } const result = Event.mapReduce( this.eventListAsArray(), fpath, func, options ); return result[fpath]; } /** * Gets n quantiles within the Collection. This works the same way as numpy. * * @param {integer} n The number of quantiles to divide the * Collection into. * * @param {string} column The field to return as the quantile * * @param {string} interp Specifies the interpolation method * to use when the desired quantile lies between * two data points. Options are: * options are: * * linear: i + (j - i) * fraction, where fraction is the fractional part of the index surrounded by i and j. * * lower: i. * * higher: j. * * nearest: i or j whichever is nearest. * * midpoint: (i + j) / 2. * * @return {array} An array of n quantiles */ quantile(n, column = "value", interp = "linear") { const results = []; const sorted = this.sort(column); const subsets = 1.0 / n; if (n > this.length) { throw new Error("Subset n is greater than the Collection length"); } for (let i = subsets; i < 1; i += subsets) { const index = Math.floor((sorted.size() - 1) * i); if (index < sorted.size() - 1) { const fraction = (sorted.size() - 1) * i - index; const v0 = sorted.at(index).get(column); const v1 = sorted.at(index + 1).get(column); let v; if (interp === "lower" || fraction === 0) { v = v0; } else if (interp === "linear") { v = v0 + (v1 - v0) * fraction; } else if (interp === "higher") { v = v1; } else if (interp === "nearest") { v = fraction < 0.5 ? v0 : v1; } else if (interp === "midpoint") { v = (v0 + v1) / 2; } results.push(v); } } return results; } /** * Returns true if all events in this Collection are in chronological order. * @return {Boolean} True if all events are in order, oldest events to newest. */ isChronological() { let result = true; let t; for (const e of this.events()) { if (!t) { t = e.timestamp().getTime(); } else { if (e.timestamp() < t) { result = false; } t = e.timestamp(); } } return result; } /** * STATIC */ /** * Static function to compare two collections to each other. If the collections * are of the same instance as each other then equals will return true. * * @param {Collection} collection1 * @param {Collection} collection2 * * @return {bool} result */ static equal(collection1, collection2) { return collection1._type === collection2._type && collection1._eventList === collection2._eventList; } /** * Static function to compare two collections to each other. If the collections * are of the same value as each other then equals will return true. * * @param {Collection} collection1 * @param {Collection} collection2 * * @return {bool} result */ static is(collection1, collection2) { return collection1._type === collection2._type && Immutable.is(collection1._eventList, collection2._eventList); } } export default Collection;