UNPKG

@naturalcycles/scrubber-lib

Version:

Scrub data in JavaScript plain objects by using rules defined in a configuration object

221 lines (187 loc) 7.27 kB
import { _assert } from '@naturalcycles/js-lib/error/assert.js' import { _deepEquals } from '@naturalcycles/js-lib/object' import type { StringMap } from '@naturalcycles/js-lib/types' import { nanoid } from '@naturalcycles/nodejs-lib' import type { ScrubberConfig, ScrubbersMap, ScrubbersSQLMap } from './scrubber.model.js' import { defaultScrubbers, defaultScrubbersSQL } from './scrubbers.js' const defaultCfg: Partial<ScrubberConfig> = { throwOnError: false, preserveFalsy: true } export class Scrubber { private readonly scrubbersMap: ScrubbersMap private readonly scrubbersSQLMap: ScrubbersSQLMap private readonly initializationVector: string private readonly rootType?: string /** * Create new scrubber instance * * @param cfg * @param additionalScrubbersMap optional additional scrubbers * @param additionalScrubbersSQLMap optional additional scrubbers SQL * @param initializationVector optional initialization vector used by some scrubbers. * @param rootType optional root type. Assumes all objects passed to this scubber is of named type for the sake of parent matching. */ constructor( private cfg: ScrubberConfig, additionalScrubbersMap?: ScrubbersMap, additionalScrubbersSQLMap?: ScrubbersSQLMap, initializationVector?: string, rootType?: string, ) { this.initializationVector = initializationVector || nanoid() this.scrubbersMap = { ...defaultScrubbers, ...additionalScrubbersMap } this.scrubbersSQLMap = { ...defaultScrubbersSQL, ...additionalScrubbersSQLMap } this.cfg = { ...defaultCfg, ...this.expandCfg(cfg) } this.cfg.splitFields = this.splitFields(cfg) this.checkIfScrubbersExistAndRaise(cfg, this.scrubbersMap) this.rootType = rootType } static getScrubberForType( rootType: string, cfg: ScrubberConfig, additionalScrubbersImpl?: ScrubbersMap, additionalScrubbersSQLImpl?: ScrubbersSQLMap, initializationVector?: string, ): Scrubber { return new Scrubber( cfg, additionalScrubbersImpl, additionalScrubbersSQLImpl, initializationVector, rootType, ) } /** * Returns undefined if there's no scrubber defined for the field. */ getScrubberSql(fieldName: string): string | undefined { const scrubberCurrentField = this.cfg.fields[fieldName] if (!scrubberCurrentField) return undefined const scrubber = this.scrubbersSQLMap[scrubberCurrentField.scrubber] _assert(scrubber, `No SQL factory for ${scrubberCurrentField.scrubber}, used for ${fieldName}`) return scrubber({ initializationVector: this.initializationVector, ...scrubberCurrentField.params, }) } scrub<T>(data: T): T { return this.applyScrubbers(data, this.rootType ? [this.rootType] : undefined) } private applyScrubbers<T>(data: T, parents: string[] = []): T { const isArray = Array.isArray(data) const dataCopy: any = Array.isArray(data) ? data.slice() : { ...data } for (const key of Object.keys(dataCopy)) { let scrubberCurrentField = this.cfg.fields[key] if (!scrubberCurrentField && this.cfg.splitFields?.[key] && parents) { for (const splitFieldParentCfg of this.cfg.splitFields[key]) { if (this.arrayContainsInOrder(parents, splitFieldParentCfg)) { const recomposedKey = [...splitFieldParentCfg, key].join('.') scrubberCurrentField = this.cfg.fields[recomposedKey] } } } if (!scrubberCurrentField) { // Ignore unsupported object types if ( dataCopy[key] instanceof Map || dataCopy[key] instanceof Set || Buffer.isBuffer(dataCopy[key]) ) { continue } // Deep traverse if (typeof dataCopy[key] === 'object' && dataCopy[key]) { // Don't append array keys to parent array as it breaks parent matching const parentsNext = isArray ? parents : [...parents, key] dataCopy[key] = this.applyScrubbers(dataCopy[key], parentsNext) } continue } const scrubber = this.scrubbersMap[scrubberCurrentField.scrubber]! const params = { initializationVector: this.initializationVector, ...scrubberCurrentField.params, } try { if (!this.cfg.preserveFalsy || dataCopy[key]) { dataCopy[key] = scrubber(dataCopy[key], params) } } catch (err) { if (this.cfg.throwOnError) { throw err } console.log( `Error when applying scrubber '${scrubberCurrentField.scrubber}' to field '${key}'`, err, ) } } return dataCopy } /* * Allows comma-separated field names to be used as keys on YAML for better reusability * YAML: * field1, field2, field3: * scrubber: <scrubberName> * * Will become: * field1: * scrubber: <scrubberName> * field2: * scrubber: <scrubberName> ... * * * This function returns a new ScrubberConfig where each field is denormalized, * allowing fast lookup by keys */ private expandCfg(cfg: ScrubberConfig): ScrubberConfig { const newCfg = { ...cfg } Object.keys(newCfg.fields).forEach(key => { if (key.includes(',')) { const fieldNames = key.split(',') const fieldCfg = newCfg.fields[key]! delete newCfg.fields[key] fieldNames.forEach(fieldName => { newCfg.fields[fieldName.trim()] = fieldCfg }) } }) return newCfg } private checkIfScrubbersExistAndRaise(cfg: ScrubberConfig, scrubbers: ScrubbersMap): void { _assert(cfg.fields, "Missing the 'fields' key on ScrubberConfig") const scrubbersOnConfig = Object.keys(cfg.fields).map(field => cfg.fields[field]!.scrubber) const scrubbersAvailable = Object.keys(scrubbers) scrubbersOnConfig.forEach(scrubber => { _assert(scrubbersAvailable.includes(scrubber), `${scrubber} not found`) }) } private splitFields(cfg: ScrubberConfig): StringMap<string[][]> { const output: StringMap<string[][]> = {} for (const field of Object.keys(cfg.fields)) { const splitField = field.split('.') if (splitField.length > 1) { const key = splitField.pop()! // Support multiple keys with different parents output[key] ||= [] output[key].push(splitField) } } return output } /** * returns true if all entries in b are equal to the end of entries of a. a may be longer than b. * Supports objects inside of arrays by removing any integer entries from a before comparing */ private arrayContainsInOrder(a: any[] | undefined, b: any[] | undefined): boolean { if (!a || !b) return false if (a === b) return true if (a.length < b.length) return false const intRegex = /^[0-9]*$/g // Remove any entries that are integers as we assume they are array indices that should be ignored for parent matching let aSliced = a.filter(e => !intRegex.test(e)) if (aSliced.length < b.length) return false // a may be longer than b, slice a to the size of b, take chunk from the end aSliced = aSliced.slice(aSliced.length - b.length) return _deepEquals(aSliced, b) } }