jieba-js
Version:
jieba-js chinese word segmentation for node in pure javascript.
73 lines (72 loc) • 1.86 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const fs = require("fs");
const crlf_normalize_1 = require("crlf-normalize");
const stream_1 = require("../../stream");
function parse(input) {
return crlf_normalize_1.lineSplit(input)
.reduce(function (a, input) {
if (input.trim() !== '') {
let row = parseLine(input);
a.push(row);
}
return a;
}, []);
}
exports.parse = parse;
function parseLine(input) {
let [str, n, s] = input
.replace(/^\s+|\s+$/, '')
.split(/\s+/g);
return [str, parseInt(n), s];
}
exports.parseLine = parseLine;
function loadSync(file) {
let input = fs.readFileSync(file);
return parse(input.toString());
}
exports.loadSync = loadSync;
function load(file) {
return new Promise(function (resolve, reject) {
loadStream(file, function (err, dict, stream) {
if (err) {
reject(err);
}
else {
resolve(dict);
}
});
});
}
exports.load = load;
function loadStream(file, cb) {
let stream = stream_1.default.createReadStream(file, {
trailing: true,
allowEmptyLine: false,
});
let i = 0;
let dict = [];
try {
stream.on('data', function (line, ...argv) {
dict.push(parseLine(line));
});
stream.on('close', function (line, ...argv) {
if (typeof line == 'string') {
dict.push(parseLine(line));
}
});
stream.on('end', function () {
if (cb) {
cb(null, dict, stream);
}
});
}
catch (e) {
stream.emit('error', e, dict);
if (cb) {
cb(e, dict, stream);
}
}
return stream;
}
exports.loadStream = loadStream;