@naturalcycles/scrubber-lib
Version:
Scrub data in JavaScript plain objects by using rules defined in a configuration object
171 lines (170 loc) • 7.27 kB
JavaScript
import { _assert } from '@naturalcycles/js-lib/error/assert.js';
import { _deepEquals } from '@naturalcycles/js-lib/object';
import { nanoid } from '@naturalcycles/nodejs-lib';
import { defaultScrubbers, defaultScrubbersSQL } from './scrubbers.js';
const defaultCfg = { throwOnError: false, preserveFalsy: true };
export class Scrubber {
cfg;
scrubbersMap;
scrubbersSQLMap;
initializationVector;
rootType;
/**
* Create new scrubber instance
*
* @param cfg
* @param additionalScrubbersMap optional additional scrubbers
* @param additionalScrubbersSQLMap optional additional scrubbers SQL
* @param initializationVector optional initialization vector used by some scrubbers.
* @param rootType optional root type. Assumes all objects passed to this scubber is of named type for the sake of parent matching.
*/
constructor(cfg, additionalScrubbersMap, additionalScrubbersSQLMap, initializationVector, rootType) {
this.cfg = cfg;
this.initializationVector = initializationVector || nanoid();
this.scrubbersMap = { ...defaultScrubbers, ...additionalScrubbersMap };
this.scrubbersSQLMap = { ...defaultScrubbersSQL, ...additionalScrubbersSQLMap };
this.cfg = { ...defaultCfg, ...this.expandCfg(cfg) };
this.cfg.splitFields = this.splitFields(cfg);
this.checkIfScrubbersExistAndRaise(cfg, this.scrubbersMap);
this.rootType = rootType;
}
static getScrubberForType(rootType, cfg, additionalScrubbersImpl, additionalScrubbersSQLImpl, initializationVector) {
return new Scrubber(cfg, additionalScrubbersImpl, additionalScrubbersSQLImpl, initializationVector, rootType);
}
/**
* Returns undefined if there's no scrubber defined for the field.
*/
getScrubberSql(fieldName) {
const scrubberCurrentField = this.cfg.fields[fieldName];
if (!scrubberCurrentField)
return undefined;
const scrubber = this.scrubbersSQLMap[scrubberCurrentField.scrubber];
_assert(scrubber, `No SQL factory for ${scrubberCurrentField.scrubber}, used for ${fieldName}`);
return scrubber({
initializationVector: this.initializationVector,
...scrubberCurrentField.params,
});
}
scrub(data) {
return this.applyScrubbers(data, this.rootType ? [this.rootType] : undefined);
}
applyScrubbers(data, parents = []) {
const isArray = Array.isArray(data);
const dataCopy = Array.isArray(data) ? [...data] : { ...data };
for (const key of Object.keys(dataCopy)) {
let scrubberCurrentField = this.cfg.fields[key];
if (!scrubberCurrentField && this.cfg.splitFields?.[key] && parents) {
for (const splitFieldParentCfg of this.cfg.splitFields[key]) {
if (this.arrayContainsInOrder(parents, splitFieldParentCfg)) {
const recomposedKey = [...splitFieldParentCfg, key].join('.');
scrubberCurrentField = this.cfg.fields[recomposedKey];
}
}
}
if (!scrubberCurrentField) {
// Ignore unsupported object types
if (dataCopy[key] instanceof Map ||
dataCopy[key] instanceof Set ||
Buffer.isBuffer(dataCopy[key])) {
continue;
}
// Deep traverse
if (typeof dataCopy[key] === 'object' && dataCopy[key]) {
// Don't append array keys to parent array as it breaks parent matching
const parentsNext = isArray ? parents : [...parents, key];
dataCopy[key] = this.applyScrubbers(dataCopy[key], parentsNext);
}
continue;
}
const scrubber = this.scrubbersMap[scrubberCurrentField.scrubber];
const params = {
initializationVector: this.initializationVector,
...scrubberCurrentField.params,
};
try {
if (!this.cfg.preserveFalsy || dataCopy[key]) {
dataCopy[key] = scrubber(dataCopy[key], params);
}
}
catch (err) {
if (this.cfg.throwOnError) {
throw err;
}
console.log(`Error when applying scrubber '${scrubberCurrentField.scrubber}' to field '${key}'`, err);
}
}
return dataCopy;
}
/*
* Allows comma-separated field names to be used as keys on YAML for better reusability
* YAML:
* field1, field2, field3:
* scrubber: <scrubberName>
*
* Will become:
* field1:
* scrubber: <scrubberName>
* field2:
* scrubber: <scrubberName>
...
*
*
* This function returns a new ScrubberConfig where each field is denormalized,
* allowing fast lookup by keys
*/
expandCfg(cfg) {
const newCfg = { ...cfg };
Object.keys(newCfg.fields).forEach(key => {
if (key.includes(',')) {
const fieldNames = key.split(',');
const fieldCfg = newCfg.fields[key];
delete newCfg.fields[key];
fieldNames.forEach(fieldName => {
newCfg.fields[fieldName.trim()] = fieldCfg;
});
}
});
return newCfg;
}
checkIfScrubbersExistAndRaise(cfg, scrubbers) {
_assert(cfg.fields, "Missing the 'fields' key on ScrubberConfig");
const scrubbersOnConfig = Object.keys(cfg.fields).map(field => cfg.fields[field].scrubber);
const scrubbersAvailable = Object.keys(scrubbers);
scrubbersOnConfig.forEach(scrubber => {
_assert(scrubbersAvailable.includes(scrubber), `${scrubber} not found`);
});
}
splitFields(cfg) {
const output = {};
for (const field of Object.keys(cfg.fields)) {
const splitField = field.split('.');
if (splitField.length > 1) {
const key = splitField.pop();
// Support multiple keys with different parents
output[key] ||= [];
output[key].push(splitField);
}
}
return output;
}
/**
* returns true if all entries in b are equal to the end of entries of a. a may be longer than b.
* Supports objects inside of arrays by removing any integer entries from a before comparing
*/
arrayContainsInOrder(a, b) {
if (!a || !b)
return false;
if (a === b)
return true;
if (a.length < b.length)
return false;
const intRegex = /^[0-9]*$/g;
// Remove any entries that are integers as we assume they are array indices that should be ignored for parent matching
let aSliced = a.filter(e => !intRegex.test(e));
if (aSliced.length < b.length)
return false;
// a may be longer than b, slice a to the size of b, take chunk from the end
aSliced = aSliced.slice(aSliced.length - b.length);
return _deepEquals(aSliced, b);
}
}