budoux
Version:
A small chunk segmenter.
220 lines • 10.1 kB
JavaScript
/**
* @license
* Copyright 2021 Google LLC
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const cli_js_1 = require("../cli.js");
const child_process_1 = require("child_process");
const path = __importStar(require("path"));
const stream_1 = __importDefault(require("stream"));
const index_js_1 = require("../index.js");
const runCli = (args) => {
return new Promise(resolve => {
const binPath = path.resolve('./bin/budoux.js');
(0, child_process_1.execFile)('node', [binPath, ...args], (error, stdout, stderr) => {
resolve({
error,
stdout,
stderr,
});
});
});
};
describe('cli', () => {
beforeEach(() => {
spyOn(console, 'log');
});
it('should output the wrapped HTML sentence when execute budoux command with --html option.', () => {
const inputText = '今日は天気です。';
const argv = ['node', 'budoux', '--html', inputText];
const expectedStdOut = '<span style="word-break: keep-all; overflow-wrap: anywhere;">今日は\u200B天気です。</span>';
(0, cli_js_1.cli)(argv);
expect(console.log).toHaveBeenCalledWith(expectedStdOut);
});
it('should output the wrapped HTML sentence when execute budoux command with -H option alias.', () => {
const inputText = '今日は天気です。';
const argv = ['node', 'budoux', '-H', inputText];
const expectedStdOut = '<span style="word-break: keep-all; overflow-wrap: anywhere;">今日は\u200B天気です。</span>';
(0, cli_js_1.cli)(argv);
expect(console.log).toHaveBeenCalledWith(expectedStdOut);
});
it('should output the separated sentence with custom model when execute budoux command with --model option.', () => {
const inputText = 'abcdeabcd';
const customModelPath = path.resolve(__dirname, 'models', 'separate_right_before_a.json');
const argv = ['node', 'budoux', '--model', customModelPath, inputText];
const expectedStdOuts = 'abcde\nabcd'.split('\n');
(0, cli_js_1.cli)(argv);
expectedStdOuts.forEach(stdout => {
expect(console.log).toHaveBeenCalledWith(stdout);
});
});
it('should output the separated sentence with custom model when execute budoux command with -m option alias.', () => {
const inputText = 'abcdeabcd';
const customModelPath = path.resolve(__dirname, 'models', 'separate_right_before_a.json');
const argv = ['node', 'budoux', '-m', customModelPath, inputText];
const expectedStdOuts = 'abcde\nabcd'.split('\n');
(0, cli_js_1.cli)(argv);
expectedStdOuts.forEach(stdout => {
expect(console.log).toHaveBeenCalledWith(stdout);
});
});
it('should use the corresponding language model when the -l parameter is given.', () => {
const inputTextHans = '我们的使命是整合全球信息,供大众使用,让人人受益。';
const expectedStdOuts = (0, index_js_1.loadDefaultParsers)()
.get('zh-hans')
.parse(inputTextHans);
const argv = ['node', 'budoux', '-l', 'zh-hans', inputTextHans];
(0, cli_js_1.cli)(argv);
expectedStdOuts.forEach(stdout => {
expect(console.log).toHaveBeenCalledWith(stdout);
});
});
it('should use the corresponding language model when the --lang parameter is given.', () => {
const inputTextHans = '我們的使命是匯整全球資訊,供大眾使用,使人人受惠。';
const expectedStdOuts = (0, index_js_1.loadDefaultParsers)()
.get('zh-hant')
.parse(inputTextHans);
const argv = ['node', 'budoux', '--lang', 'zh-hant', inputTextHans];
(0, cli_js_1.cli)(argv);
expectedStdOuts.forEach(stdout => {
expect(console.log).toHaveBeenCalledWith(stdout);
});
});
it('should prioritize -m and --model over -l and --lang', () => {
const inputTextHans = '我們的使a命';
const customModelPath = path.resolve(__dirname, 'models', 'separate_right_before_a.json');
const argv = [
'node',
'budoux',
'--model',
customModelPath,
'--lang',
'zh-hant',
inputTextHans,
];
(0, cli_js_1.cli)(argv);
const expectedStdOuts = '我們的使\na命'.split('\n');
expectedStdOuts.forEach(stdout => {
expect(console.log).toHaveBeenCalledWith(stdout);
});
});
it('should output the separated sentence with separater when execute budoux command with --delim option.', () => {
const inputText = '今日は天気です。\n明日は雨かな?';
const argv = ['node', 'budoux', '--delim', '---', inputText];
const expectedStdOuts = '今日は\n天気です。\n---\n明日は\n雨かな?'.split('\n');
(0, cli_js_1.cli)(argv);
expectedStdOuts.forEach(stdout => {
expect(console.log).toHaveBeenCalledWith(stdout);
});
});
it('should output the separated sentence with separater when execute budoux command with -d option alias.', () => {
const inputText = '今日は天気です。\n明日は雨かな?';
const argv = ['node', 'budoux', '--delim', '---', inputText];
const expectedStdOuts = '今日は\n天気です。\n---\n明日は\n雨かな?'.split('\n');
(0, cli_js_1.cli)(argv);
expectedStdOuts.forEach(stdout => {
expect(console.log).toHaveBeenCalledWith(stdout);
});
});
it('should output the separated sentence with separater when execute budoux with stdin inputed by pipe', async () => {
const runCliWithStdin = (stdin) => {
return new Promise(resolve => {
const binPath = path.resolve('./bin/budoux.js');
const child = (0, child_process_1.execFile)('node', [binPath], (error, stdout, stderr) => {
resolve({
error,
stdout,
stderr,
});
});
const stdinStream = new stream_1.default.Readable();
stdinStream.push(stdin);
stdinStream.push(null);
if (child.stdin) {
stdinStream.pipe(child.stdin);
}
});
};
const { stdout } = await runCliWithStdin('今日は天気です。\n明日は雨かな?');
const expectedStdOut = '今日は\n天気です。\n---\n明日は\n雨かな?\n';
expect(stdout).toBe(expectedStdOut);
}, 3000);
it('should output the error message when get more than one text argument.', () => {
const argv = [
'node',
'budoux',
'今日は天気です。',
'明日は晴れるでしょう。',
];
const stab = () => (0, cli_js_1.cli)(argv);
expect(stab).toThrowError('Too many arguments. Please, pass the only one argument.');
});
it('should output the error message when get extra option argument.', () => {
const argv = [
'node',
'budoux',
'--delim',
'---',
'<extra delimiter option arguments>',
'今日は天気です。',
];
const stab = () => (0, cli_js_1.cli)(argv);
expect(stab).toThrowError('Too many arguments. Please, pass the only one argument.');
});
it('should output the error message when get extra option argument.', () => {
const customModelPath = path.resolve(__dirname, 'models', 'separate_right_before_a.json');
const argv = [
'node',
'budoux',
'--model',
customModelPath,
'<extra model option arguments>',
'今日は天気です。',
];
const stab = () => (0, cli_js_1.cli)(argv);
expect(stab).toThrowError('Too many arguments. Please, pass the only one argument.');
});
it('should output the unknown option error when execute budoux command with -v option.', async () => {
const { stderr } = await runCli(['-v']);
expect(stderr).toBe("error: unknown option '-v'\n");
}, 3000);
});
//# sourceMappingURL=test_cli.js.map
;