UNPKG

lande

Version:

A tiny neural network for natural language detection.

160 lines (156 loc) 4.65 kB
// node_modules/toygrad/dist/buffer.js var Buffer = class extends Float32Array { }; var buffer_default = Buffer; // node_modules/toygrad/dist/utils.js var isBuffer = (value) => { return value instanceof Float32Array; }; var isNumber = (value) => { return typeof value === "number"; }; var isUndefined = (value) => { return typeof value === "undefined"; }; var randn = (mean, stdev) => { const u = 1 - Math.random(); const v = Math.random(); const z = Math.sqrt(-2 * Math.log(u)) * Math.cos(2 * Math.PI * v); const rand = z * stdev + mean; return rand; }; // node_modules/toygrad/dist/tensor.js var Tensor = class { constructor(sx, sy, sz, value, dvalue) { this.sx = sx; this.sy = sy; this.sz = sz; this.length = sx * sy * sz; if (isUndefined(value)) { const scale = Math.sqrt(1 / this.length); const get = () => randn(0, scale); this.w = new buffer_default(this.length).map(get); } else if (isNumber(value)) { this.w = new buffer_default(this.length); if (value !== 0) { this.w.fill(value); } } else if (isBuffer(value)) { this.w = value; } else { this.w = new buffer_default(value); } this.dw = dvalue || new buffer_default(this.length); } index(x, y, z) { return (this.sx * y + x) * this.sz + z; } get(x, y, z) { return this.w[this.index(x, y, z)]; } set(x, y, z, value) { return this.w[this.index(x, y, z)] = value; } add(x, y, z, value) { return this.w[this.index(x, y, z)] += value; } getGrad(x, y, z) { return this.dw[this.index(x, y, z)]; } setGrad(x, y, z, value) { return this.dw[this.index(x, y, z)] = value; } addGrad(x, y, z, value) { return this.dw[this.index(x, y, z)] += value; } clone() { const clone = new Tensor(this.sx, this.sy, this.sz, this.w); clone.w = clone.w.slice(); return clone; } cloneWithZeros() { return new Tensor(this.sx, this.sy, this.sz, 0); } }; var tensor_default = Tensor; // src/utils.ts var forEachLine = (buffer, callback) => { let start = 0; while (true) { const end = buffer.indexOf("\n", start); if (end <= start) break; const line = buffer.subarray(start, end).toString(); callback(line); start = end + 1; } }; var getNgrams = (str, length) => { let ngrams = {}; let total = 0; for (let i = 0, l = str.length - length; i <= l; i++) { const value = str.slice(i, i + length); ngrams[value] || (ngrams[value] = { value, count: 0, frequency: 0 }); ngrams[value].count += 1; total += 1; } for (let value in ngrams) { ngrams[value].frequency = ngrams[value].count / total; } return ngrams; }; var getNormalized = (str) => { const hyphenRe = /-+/g; const ignoreRe = /[^\p{L}\p{M}\s]/gu; const whitespaceRe = /\s{2,}/g; return ` ${str.replace(hyphenRe, " ").replace(ignoreRe, "").replace(whitespaceRe, " ").toLowerCase().trim()} `; }; var getTopKeys = (obj) => { const keys = Object.keys(obj); const keysSorted = keys.sort((a, b) => obj[b] - obj[a]); return keysSorted; }; var infer = (text, langs, ngrams, nn) => { const textNorm = getNormalized(text); const unigrams = getNgrams(textNorm, 1); const bigrams = getNgrams(textNorm, 2); const trigrams = getNgrams(textNorm, 3); const quadgrams = getNgrams(textNorm, 4); const inputUnigrams = ngrams.unigrams.map((value) => { var _a; return ((_a = unigrams[value]) == null ? void 0 : _a.frequency) || 0; }); const inputBigrams = ngrams.bigrams.map((value) => { var _a; return ((_a = bigrams[value]) == null ? void 0 : _a.frequency) || 0; }); const inputTrigrams = ngrams.trigrams.map((value) => { var _a; return ((_a = trigrams[value]) == null ? void 0 : _a.frequency) || 0; }); const inputQuadgrams = ngrams.quadgrams.map((value) => { var _a; return ((_a = quadgrams[value]) == null ? void 0 : _a.frequency) || 0; }); const inputNgrams = [...inputUnigrams, ...inputBigrams, ...inputTrigrams, ...inputQuadgrams]; const input = new tensor_default(1, 1, inputNgrams.length, new Float32Array(inputNgrams)); const output = nn.forward(input, false).w; const result = Array.from(output).map((probability, index) => [langs[index], probability]); const resultSorted = result.sort((a, b) => b[1] - a[1]); return resultSorted; }; var padEnd = (arr, length, padder) => { if (arr.length >= length) return arr; const padding = new Array(length - arr.length).fill(padder); const padded = arr.concat(padding); return padded; }; export { forEachLine, getNgrams, getNormalized, getTopKeys, infer, padEnd };