@waynew/boa
Version:
Use Python modules seamlessly in Node.js
65 lines (55 loc) • 1.63 kB
JavaScript
;
const boa = require('../../');
const fs = require('fs');
const glob = require('glob').sync;
const acorn = require('acorn');
const { set, len, list } = boa.builtins();
const { DBSCAN } = boa.import('sklearn.cluster');
const { word2vec } = boa.import('gensim.models');
const cwd = process.cwd();
let files = [];
files = files.concat(glob(cwd + '/lib/**/*.js'));
files = files.concat(glob(cwd + '/example/**/*.js'));
files = files.concat(glob(cwd + '/node_modules/**/*.js'));
const sentences = [];
const vec2word = {};
const samples = files
.map((f) => fs.readFileSync(f))
.map((s) => {
let ast;
try { ast = acorn.parse(s); } catch (e) {
console.error('just ignore the error');
}
return ast;
})
.filter((ast) => ast !== undefined)
.reduce((list, ast) => {
const fn = ast.body.filter((stmt) => stmt.type === 'FunctionDeclaration');
list = list.concat(fn);
return list;
}, []);
samples.forEach((sample) => sentences.push([ sample.id.name ]));
const { wv } = word2vec.Word2Vec(sentences, boa.kwargs({
workers: 1,
size: 2,
min_count: 1,
window: 3,
sg: 0
}));
const X = sentences
.map((s) => wv.__getitem__(s)[0])
.map((v, i) => {
const r = [ v[0] * 100, v[1] * 100 ];
vec2word[r] = samples[i].id.name;
return r;
});
const db = DBSCAN(boa.kwargs({ eps: 0.9 })).fit(X);
const labels = db.labels_;
const n_noise_ = list(labels).count(-1);
const n_clusters_ = len(set(labels));
console.log(n_noise_, n_clusters_, set(labels));
for (let i = 0; i < len(labels); i++) {
if (labels[i] !== '-1') {
console.log(i, labels[i], vec2word[X[i]]);
}
}