UNPKG

qminer

Version:

A C++ based data analytics platform for processing large-scale real-time streams containing structured and unstructured data

571 lines (519 loc) 22.4 kB
/** * Copyright (c) 2015, Jozef Stefan Institute, Quintelligence d.o.o. and contributors * All rights reserved. * * This source code is licensed under the FreeBSD license found in the * LICENSE file in the root directory of this source tree. */ var assert = require('../../src/nodejs/scripts/assert.js'); var qm = require('../../index.js'); var async = require('async'); function assertUpdateSequence(recField, recValArr, updatesArr, store, aggr) { var recJsonArr = []; for (var i in recValArr) { var recJson = {}; recJson[recField] = recValArr[i]; recJsonArr.push(recJson); } assert.strictEqual(aggr.saveJson().val, 0); // should be 0 at start! for (var i in recJsonArr) { store.push(recJsonArr[i]); assert.strictEqual(aggr.saveJson().val, updatesArr[i]); } } function JsAggr() { var updates = 0; this.name = 'simple'; this.onAdd = function (rec) { updates++; } this.saveJson = function (limit) { return { val: updates }; } } describe('Stream aggregate filter', function () { var base = undefined; var store = undefined; beforeEach(function () { base = new qm.Base({ mode: 'createClean' }); base.createStore({ "name": "RecordTest", "fields": [ { "name": "Bool", "type": "bool", "null": true }, { "name": "UCh", "type": "byte", "null": true }, { "name": "Int", "type": "int", "null": true }, { "name": "Int16", "type": "int16", "null": true }, { "name": "Int64", "type": "int64", "null": true }, { "name": "UInt", "type": "uint", "null": true }, { "name": "UInt16", "type": "uint16", "null": true }, { "name": "UInt64", "type": "uint64", "null": true }, { "name": "Flt", "type": "float", "null": true }, { "name": "SFlt", "type": "sfloat", "null": true }, { "name": "Str", "type": "string", "null": true }, { "name": "Tm", "type": "datetime", "null": true } ], "joins": [{ name: "joinTest", type: "index", store: "RecordTest" }], "keys": [] }); store = base.store('RecordTest'); }); afterEach(function () { base.close(); }); describe('Constructor test', function () { it('should should throw exception', function () { var aggr = new qm.StreamAggr(base, new function () { var updates = 0; this.name = 'simple'; this.onAdd = function (rec) { updates++; } this.saveJson = function (limit) { return { val: updates }; } }); var OKInput = [{ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: "trivial" }] }]; OKInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: "Int", minValue: 5 }] }); // missing fields var BADInput = [{ type: "recordFilterAggr", filters: [{ type: "field", store: "RecordTest", field: "Int", minValue: 5 }] }]; BADInput.push({ type: "recordFilterAggr", aggr: aggr.name }); BADInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ store: "RecordTest", field: "Int", minValue: 5 }] }); BADInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: "field", field: "Int", minValue: 5 }] }); BADInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", minValue: 5 }] }); BADInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: "Int" }] }); // bad fields BADInput.push({ type: "recordFilterAggr", aggr: "lala", filters: [{ type: "field", store: "RecordTest", field: "Int", minValue: 5 }] }); BADInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: "lala", store: "RecordTest", field: "Int", minValue: 5 }] }); BADInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: "field", store: "lala", field: "Int", minValue: 5 }] }); BADInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: "lala", minValue: 5 }] }); BADInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: "Int", minValue: "lala" }] }); // null fields BADInput.push({ type: "recordFilterAggr", aggr: null, filters: [{ type: "field", store: "RecordTest", field: "Int", minValue: 5 }] }); BADInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: null, store: "RecordTest", field: "Int", minValue: 5 }] }); BADInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: "field", store: null, field: "Int", minValue: 5 }] }); BADInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: null, minValue: 5 }] }); BADInput.push({ type: "recordFilterAggr", aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: "Int", minValue: null }] }); for (key in OKInput) { assert.doesNotThrow(function () { store.addStreamAggr(OKInput[key]); }); } for (key in BADInput) { assert.throws(function () { store.addStreamAggr(BADInput[key]); }); } }); }); var fields = ["UCh", "Int", "Int16", "Int64", "UInt", "UInt16", "UInt64", "Flt", "SFlt", "Tm"]; describe('Numeric field range filters', function () { async.each(fields, function (field, callback) { it('should filter ' + field + ' fields outside 5 and 6', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: field, minValue: 5, maxValue: 6 }] }); assertUpdateSequence(field, [5, 6, 7, 1], [1, 2, 2, 2], store, aggr); done(); }); callback(); }); async.each(fields, function (field, callback) { it('should filter ' + field + ' fields below 5', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: field, minValue: 5 }] }); assertUpdateSequence(field, [5, 6, 7, 1], [1, 2, 3, 3], store, aggr); done(); }); callback(); }); async.each(fields, function (field, callback) { it('should filter ' + field + ' fields above 6', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: field, maxValue: 6 }] }); assertUpdateSequence(field, [5, 6, 7, 1], [1, 2, 2, 3], store, aggr); done(); }); callback(); }); async.each(fields, function (field, callback) { it('should filter out null ' + field + ' fields', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: field, minValue: 5, maxValue: 6, letNullThrough: false }] }); assertUpdateSequence(field, [5, 6, null, 7, null, 5, 1], [1, 2, 2, 2, 2, 3, 3], store, aggr); done(); }); callback(); }); async.each(fields, function (field, callback) { it('should let null ' + field + ' fields through', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: field, minValue: 5, maxValue: 6, letNullThrough: true }] }); assertUpdateSequence(field, [5, 6, null, 7, null, 5, 1], [1, 2, 3, 3, 4, 5, 5], store, aggr); done(); }); callback(); }); }); describe('Caller aggregate', function () { it('should identify the filter aggregate as the caller', function (done) { var outAggr = new qm.StreamAggr(base, new function () { this.onAdd = function (rec, agg) { if (filt.name != agg.name) { // assert doesn't work in this async setting done(new Error('caller incorrect')); } } }); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: outAggr.name, filters: [{ type: "field", store: "RecordTest", field: "Bool", value: true }] }); store.push({ "Bool": true }); store.push({ "Bool": false }); store.push({ "Bool": true }); done(); }); }); describe('Bool field filter', function () { it('should filter Bool fields that are true', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: "Bool", value: true }] }); assertUpdateSequence("Bool", [true, true, false, false], [1, 2, 2, 2], store, aggr); done(); }); }); describe('String field filter', function () { it('should filter string fields by value', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: "Str", value: "tesi" }] }); assertUpdateSequence("Str", ["tesi", "tesi", "notTesi", "TESI"], [1, 2, 2, 2], store, aggr); done(); }); }); describe('String range filter', function () { it('should filter string fields by range', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: "Str", minValue: "tesia", maxValue: "tesic" }] }); assertUpdateSequence("Str", ["tesia", "tesib", "tesid", "tesi"], [1, 2, 2, 2], store, aggr); done(); }); }); describe('String set filter', function () { it('should filter string fields by using a set', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: "Str", set: ["tesi", "test"] }] }); assertUpdateSequence("Str", ["tesi", "test", "tesid", "tesii"], [1, 2, 2, 2], store, aggr); done(); }); }); describe('Record subsampling filter', function () { it('should filter every second record', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "subsampling", store: "RecordTest", skip: 1 }] }); assertUpdateSequence("Str", ["a", "b", "c", "d", "e", "f"], [1, 1, 2, 2, 3, 3], store, aggr); done(); }); }); describe('Record exists filter', function () { it('should filter records that are not in store', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "recordExists", store: "RecordTest" }] }); assertUpdateSequence("Str", ["a", "b", "c"], [1, 2, 3], store, aggr); var rec = store.newRecord({ Str: "test" }); //not pushed to store filt.onAdd(rec); // should not pass assert.strictEqual(aggr.saveJson().val, 3); filt.onAdd(store[0]); // should pass assert.strictEqual(aggr.saveJson().val, 4); done(); }); }); describe('Record id range filter', function () { it('should filter records outside id range', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "recordId", store: "RecordTest", minRecId: 2, maxRecId: 3 }] }); assertUpdateSequence("Str", ["a", "b", "c", "d", "e"], [0, 0, 1, 2, 2], store, aggr); done(); }); }); describe('Record id set', function () { it('should filter records excluded from an id set', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "recordId", store: "RecordTest", recIdSet: [2,3] }] }); assertUpdateSequence("Str", ["a", "b", "c", "d", "e"], [0, 0, 1, 2, 2], store, aggr); done(); }); it('should filter records contained in an id set', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "recordId", store: "RecordTest", recIdSet: [2, 3], isComplement: false }] }); assertUpdateSequence("Str", ["a", "b", "c", "d", "e"], [1, 2, 2, 2, 3], store, aggr); done(); }); }); describe('Record fq range filter', function () { it.skip('should filter records with fqs out of range', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "recordFq", store: "RecordTest", minFq: 5, maxFq: 6 }] }); store.push({ Str: "a" }); store.push({ Str: "b" }); store.push({ Str: "c" }); store.push({ Str: "d" }); store.push({ Str: "e" }); store[0].$addJoin("joinTest", 1, 5); store[0].$addJoin("joinTest", 2, 6); store[0].$addJoin("joinTest", 3, 7); store[0].$addJoin("joinTest", 4, 1); assert.strictEqual(aggr.saveJson().val, 0); filt.onAdd(store[0].joinTest[0]); assert.strictEqual(aggr.saveJson().val, 1); filt.onAdd(store[0].joinTest[1]); assert.strictEqual(aggr.saveJson().val, 2); filt.onAdd(store[0].joinTest[2]); assert.strictEqual(aggr.saveJson().val, 2); filt.onAdd(store[0].joinTest[3]); assert.strictEqual(aggr.saveJson().val, 2); done(); }); }); describe('Record index join record id range filter', function () { it('should filter records by joined records id range', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); // at least one record in index join must have Id between minVal and maxVal var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "indexJoin", store: "RecordTest", join: "joinTest", minRecId: 2, maxRecId: 3 }] }); store.push({ Str: "a" }, false); store.push({ Str: "b" }, false); store.push({ Str: "c" }, false); store.push({ Str: "d" }, false); store.push({ Str: "e" }, false); // store[0] OK store[0].$addJoin("joinTest", 1); store[0].$addJoin("joinTest", 2); store[0].$addJoin("joinTest", 3); store[0].$addJoin("joinTest", 4); // store[1] NOT OK store[1].$addJoin("joinTest", 0); store[1].$addJoin("joinTest", 1); // store[2] OK store[2].$addJoin("joinTest", 3); // store[3] NOT OK store[3].$addJoin("joinTest", 4); assert.strictEqual(aggr.saveJson().val, 0); filt.onAdd(store[0]); assert.strictEqual(aggr.saveJson().val, 1); filt.onAdd(store[1]); assert.strictEqual(aggr.saveJson().val, 1); filt.onAdd(store[2]); assert.strictEqual(aggr.saveJson().val, 2); filt.onAdd(store[3]); assert.strictEqual(aggr.saveJson().val, 2); filt.onAdd(store[4]); assert.strictEqual(aggr.saveJson().val, 2); done(); }); }); describe('Null field tests', function () { it('should filter out null fields', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: "Str", value: "tesi", letNullThrough: false }] }); assertUpdateSequence("Str", ["tesi", "tesi", null, "tesii", null, "tesi", "TESI"], [1, 2, 2, 2, 2, 3, 3], store, aggr); done(); }); it('should let null fields through', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: "Str", value: "tesi", letNullThrough: true }] }); assertUpdateSequence("Str", ["tesi", "tesi", null, "tesii", null, "tesi", "TESI"], [1, 2, 3, 3, 4, 5, 5], store, aggr); done(); }); }); describe('Multiple filters test', function () { it('should pass records when they pass both filter tests', function (done) { var aggr = new qm.StreamAggr(base, new JsAggr); var filt = store.addStreamAggr({ type: 'recordFilterAggr', aggr: aggr.name, filters: [{ type: "field", store: "RecordTest", field: "Str", minValue: "a", maxValue: "c" }, { type: "field", store: "RecordTest", field: "Str", minValue: "b", maxValue: "d" }] }); assertUpdateSequence("Str", ["a", "b", "c", "d"], [0, 1, 2, 2], store, aggr); done(); }); }); });