unique-columns
Version:
Finds which columns are unique in tabular data
68 lines (55 loc) • 1.6 kB
JavaScript
var through = require('through2')
var pump = require('pump')
module.exports = function (jsonStream, args, cb) {
/*
jsonStream:
{"name": "bob": "age": 24, "id": 1}
{"name": "ange": "age": 29, "id": 2}
{"name": "karissa": "age": 24, "id": 3}
{"name": "john": "age": 29, "id": 4}
{"name": "john": "age": 32, "id": 5}
{"name": "grayson": "age": 29, "id": 6}
to -> cb(err, duplicates)
duplicates:
{
"name": 2,
"age": 4,
"id": 0
}
*/
if (args === 'function') {
cb = args
}
if (!args) args = {}
var allFields = {}
var rows = 0
var valueCounter = through.obj(function (row, enc, next) {
var fields = Object.keys(row) // name, age
rows += 1
for (var i in fields) {
var field = fields[i] // name
var rowValue = row[field] // "bob"
var values = allFields[field] || {} // {bob: 1, ange: 1, etc..}
values[rowValue] = (values[rowValue] || 0) + 1
allFields[field] = values
}
return next()
})
var duplicates = {}
pump(jsonStream, valueCounter, function done (err) {
if (err) return cb(err)
var fields = Object.keys(allFields)
for (var i in fields) { // [name, age, etc]
var field = fields[i] // "age"
var fieldValues = allFields[field] // { 26: 1, 29: 2, etc.. }
duplicates[field] = duplicates[field] || 0
for (var j in fieldValues) {
var count = parseInt(fieldValues[j])
if (count > 1) {
duplicates[field] = parseInt(duplicates[field]) + count
}
}
}
return cb(null, duplicates)
})
}