budoux
Version:
A small chunk segmenter.
135 lines • 5.1 kB
JavaScript
/**
* @license
* Copyright 2021 Google LLC
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.cli = void 0;
const fs_1 = require("fs");
const path = __importStar(require("path"));
const readline = __importStar(require("readline"));
const commander_1 = require("commander");
const index_js_1 = require("./index.js");
const CLI_VERSION = '0.6.2';
const defaultParsers = (0, index_js_1.loadDefaultParsers)();
/**
* Run the command line interface program.
* @param argv process.argv.
*/
const cli = (argv) => {
const program = new commander_1.Command('budoux');
program.usage('[-h] [-H] [-d STR] [-t THRES] [-m JSON] [-l LANG] [-V] [TXT]');
program.description('BudouX is the successor to Budou, the machine learning powered line break organizer tool.');
program
.option('-H, --html', 'HTML mode', false)
.option('-d, --delim <str>', 'output delimiter in TEXT mode', '---')
.option('-m, --model <json>', 'model file path')
.option('-l, --lang <str>', `language model to use. -m and --model will be prioritized if any.\navailable languages: ${[
...defaultParsers.keys(),
].join(', ')}`)
.argument('[txt]', 'text');
program.version(CLI_VERSION);
program.parse(argv);
const options = program.opts();
const { lang, model, delim, html } = options;
const { args } = program;
const parser = model
? loadCustomParser(model)
: lang && defaultParsers.has(lang)
? defaultParsers.get(lang)
: (0, index_js_1.loadDefaultJapaneseParser)();
switch (args.length) {
case 0: {
const rl = readline.createInterface({
input: process.stdin,
});
let stdin = '';
rl.on('line', line => {
stdin += line + '\n';
});
process.stdin.on('end', () => {
outputParsedTexts(parser, html, delim, [stdin]);
});
break;
}
case 1: {
outputParsedTexts(parser, html, delim, args);
break;
}
default: {
throw new Error('Too many arguments. Please, pass the only one argument.');
}
}
};
exports.cli = cli;
/**
* Run the command line interface program.
* @param parser A parser.
* @param html A flag of html output mode.
* @param delim A delimiter to separate output sentence.
* @param args string array to parse. Array should have only one element.
*/
const outputParsedTexts = (parser, html, delim, args) => {
if (html) {
const text = args[0];
const output = parser.translateHTMLString(text);
console.log(output);
}
else {
const splitedTextsByNewLine = args[0]
.split(/\r?\n/)
.filter(text => text !== '');
splitedTextsByNewLine.forEach((text, index) => {
const parsedTexts = parser.parse(text);
parsedTexts.forEach(parsedText => {
console.log(parsedText);
});
if (index + 1 !== splitedTextsByNewLine.length)
console.log(delim);
});
}
};
/**
* Loads a parser equipped with custom model.
* @return A parser with the loaded model.
*/
const loadCustomParser = (modelPath) => {
const file = (0, fs_1.readFileSync)(path.resolve(modelPath)).toString();
const model = JSON.parse(file);
return new index_js_1.HTMLProcessingParser(model);
};
//# sourceMappingURL=cli.js.map
;