cecc
Version:
繁簡轉換函式庫 追求正確率 先解析詞性再繁簡轉換 繁體中文↔簡體中文轉換 Chinese converter between Traditional Chinese and Simplified Chinese.
1,221 lines (1,072 loc) • 98.5 kB
JavaScript
/*
TODO:
簡化辭典複雜度: 分割個別作品的辭典為特設辭典。
依照前後詞彙再建立 Map(),避免條件式串列過長。這可能得考慮如何合併詞性標註錯誤時的條件式。
+ PoS: "n*" 放在 "n*:" 之下。
https://zhuanlan.zhihu.com/p/95358646
常用的关键词提取算法:TF-IDF算法、TextRank算法
https://blog.csdn.net/vivian_ll/article/details/106647666
利用jieba进行关键字提取时,有两种接口。一个基于TF-IDF算法,一个基于TextRank算法。
https://s.itho.me/techtalk/2017/%E4%B8%AD%E6%96%87%E6%96%B7%E8%A9%9E%EF%BC%9A%E6%96%B7%E5%8F%A5%E4%B8%8D%E8%A6%81%E6%82%B2%E5%8A%87.pdf
某個詞在⼀篇⽂章中出現的頻率⾼,且在其他⽂章中很少出現,則此詞語為具代表性的關鍵詞
*/
'use strict';
// modify from wikiapi.js
let CeL;
try {
// Load CeJS library.
CeL = require('cejs');
} catch (e) /* istanbul ignore next: Only for debugging locally */ {
// https://github.com/gotwarlost/istanbul/blob/master/ignoring-code-for-coverage.md
require('./_CeL.loader.nodejs.js');
CeL = globalThis.CeL;
}
// assert: typeof CeL === 'function'
// 在非 Windows 平台上避免 fatal 錯誤。
CeL.env.ignore_COM_error = true;
// Load modules.
CeL.run(['application.debug',
// 載入不同地區語言的功能 for wiki.work()。
'application.locale',
// Add color to console messages. 添加主控端報告的顏色。
'interact.console',
// CeL.data.Convert_Pairs.remove_comments()
'data.Convert_Pairs',
// for CeL_CN_to_TW()
'extension.zh_conversion',
//for CeL.get_URL()
'application.net.Ajax',
// for 'application.platform.nodejs': CeL.env.arg_hash, CeL.wiki.cache(),
// CeL.fs_mkdir(), CeL.wiki.read_dump()
'application.storage']);
/** {Number}未發現之index。 const: 基本上與程式碼設計合一,僅表示名義,不可更改。(=== -1) */
const NOT_FOUND = ''.indexOf('_');
const module_base_path = CeL.append_path_separator(module.path);
const test_directory = CeL.append_path_separator(module_base_path + '_test suite');
// Cache default convertors without CeCC.
const CeL_CN_to_TW = CeL.zh_conversion.CN_to_TW, CeL_TW_to_CN = CeL.zh_conversion.TW_to_CN;
// ----------------------------------------------------------------------------
// default
const KEY_word = 'word', KEY_PoS_tag = 'tag', KEY_filter_name = 'filter_name';
const DEFAULT_TEST_FILE_EXTENSION = 'txt';
const dictionary_template = {
TW: 'CN_to_TW.%name.%type.txt',
CN: 'TW_to_CN.%name.%type.txt'
};
function get_dictionary_file_paths(type) {
if (!this.parser_name)
throw new Error('No parser name specified!');
const dictionary_file_paths = Object.create(null);
for (const language in dictionary_template) {
let path = dictionary_template[language]
.replace('%name', this.parser_name)
.replace('%type', type || 'PoS');
if (type === 'filters')
path = path.replace(/[^.]+$/, 'js');
dictionary_file_paths[language] = path;
}
if (!type)
this.dictionary_file_paths = dictionary_file_paths;
return dictionary_file_paths;
}
// CeCC
class Chinese_converter {
constructor(options) {
this.convertion_pairs = Object.create(null);
this.KEY_word = KEY_word;
this.KEY_PoS_tag = KEY_PoS_tag;
if (options?.LTP_URL) {
this.LTP_URL = options.LTP_URL;
options.using_LTP = options.using_LTP || true;
}
if (options?.using_LTP) {
// 最高正確率
this.KEY_word = 'text';
this.KEY_PoS_tag = 'pos';
this.TAG_punctuation = 'wp';
this.condition_filter = condition_filter_LTP;
this.parser_name = 'LTP';
this.filters = get_dictionary_file_paths.call(this, 'filters');
for (const language in this.filters) {
const dictionary_file_path = this.dictionaries_directory + this.filters[language];
this.filters[language] = require(dictionary_file_path);
}
this.generate_condition = generate_condition_LTP;
load_synonym_dictionary.call(this);
this.tag_paragraph = tag_paragraph_LTP;
// .batch_get_tag 批量查詢詞性標記之條件: 1.可接受批量{Array}。 2.單次查詢消耗太大。
this.batch_get_tag = !this.LTP_URL;
} else if (options?.CoreNLP_URL) {
// using Stanford CoreNLP
this.KEY_PoS_tag = 'pos';
this.CoreNLP_URL = new URL(options.CoreNLP_URL);
this.parser_name = 'CoreNLP';
// https://stanfordnlp.github.io/CoreNLP/corenlp-server.html
this.CoreNLP_URL_properties = {
annotators: 'tokenize,ssplit,pos,depparse',
};
this.tag_paragraph = tag_paragraph_via_CoreNLP;
} else {
// fallback to default: nodejieba
this.nodejieba_CN = require("nodejieba");
this.nodejieba_CN.load({ dict: this.dictionaries_directory + 'commons.txt' });
this.parser_name = 'jieba';
this.tag_paragraph = tag_paragraph_jieba;
}
get_dictionary_file_paths.call(this);
for (const language in this.dictionary_file_paths) {
const dictionary_file_path = this.dictionary_file_paths[language]
= this.dictionaries_directory + this.dictionary_file_paths[language];
load_dictionary.call(this, dictionary_file_path, { language });
}
if (CeL.is_debug()) {
// 這些是比較耗時的轉換。
for (const [language, convertion_pairs] of Object.entries(this.convertion_pairs)) {
CeL.info(`convertion pairs for ${language}:`);
function show_convertion_pairs(_pairs, tag = 'general') {
const size = _pairs.size;
if (size > 0) {
CeL.log(`\t${tag || 'general'}\t${size} convertion(s)${size < 9 ? '\t' + Array.from(_pairs.keys()).join('\t') : ''}`);
}
}
show_convertion_pairs(convertion_pairs.get(KEY_general_pattern_filter));
for (const [tag, _pairs] of Object.entries(convertion_pairs.get(KEY_tag_pattern_filter))) {
show_convertion_pairs(_pairs, tag);
}
}
}
this.load_default_text_to_check();
}
/**
* convert to TW
* @param {Array}paragraphs [{String}, {String}, ...]
* @param {Object}[options]
*/
async to_TW(paragraphs, options) {
return await convert_Chinese.call(this, await paragraphs, { convert_to_language: 'TW', ...options });
}
to_TW_sync(paragraphs, options) {
return convert_Chinese.call(this, paragraphs, { convert_to_language: 'TW', ...options });
}
/**
* convert to CN
* @param {Array}paragraphs [{String}, {String}, ...]
* @param {Object}[options]
*/
async to_CN(paragraphs, options) {
return await convert_Chinese.call(this, await paragraphs, { convert_to_language: 'CN', ...options });
}
to_CN_sync(paragraphs, options) {
return convert_Chinese.call(this, paragraphs, { convert_to_language: 'CN', ...options });
}
// 自動判斷句子、段落的語境(配合維基百科專有名詞轉換)
detect_domain(paragraphs, options) {
// TODO
}
static async has_LTP_server(options) {
if (typeof options === 'string') {
// treat options as LTP_URL
options = { LTP_URL: options };
} else {
options = { LTP_URL: 'http://localhost:5000/', ...options };
}
if (options.skip_server_test) {
CeL.debug('強制使用 LTP server,跳過對 LTP server 的運作測試。請只在您準備全程使用 cache 的情況下才使用這個選項。', 1, Chinese_converter.has_LTP_server.name);
return options.LTP_URL;
}
try {
//console.trace(options);
// 注意: 測試 LTP server 不可包含空白或者英數字元!
const result = await tag_paragraph_LTP.call(options, '測試繁簡轉換伺服器是否正常運作');
//console.trace(result);
return Array.isArray(result) && options.LTP_URL;
} catch (e) {
//console.error(e);
}
}
//#parse_condition = parse_condition
}
// ----------------------------------------------------------------------------
function to_converted_file_path(convert_from_text__file_name) {
return convert_from_text__file_name.replace(/(\.\w+)$/, '.converted$1');
}
async function regenerate_converted(convert_from_text__file_path, convert_to_text__file_status, options) {
CeL.info(`${regenerate_converted.name}: Generate a new answer file for ${options.convert_from_text__file_name || convert_from_text__file_path}...`);
let converted_text = CeL.read_file(convert_from_text__file_path).toString();
//console.trace(options.convert_options);
converted_text = options.text_is_TW
? await this.to_CN(converted_text, options.convert_options || regenerate_converted.default_convert_options)
: await this.to_TW(converted_text, options.convert_options || regenerate_converted.default_convert_options)
;
//console.trace(converted_text.slice(0, 200));
CeL.write_file(convert_to_text__file_status
//.replace('.answer.', '.converted.')
, converted_text, { backup: { directory_name: 'backup' } });
}
regenerate_converted.default_convert_options = {
cache_directory: CeL.append_path_separator(test_directory + 'cache_data'),
cache_file_for_short_sentences: true,
// 超過此長度才創建個別的 cache 檔案,否則會放在 .cache_file_for_short_sentences。
min_cache_length: 40,
};
function get_convert_to_text__file_status(convert_from_text__file_name, options) {
options = CeL.setup_options(options);
const convert_from_text__file_path = this.test_articles_directory + convert_from_text__file_name;
const convert_from_text__file_status = CeL.fso_status(convert_from_text__file_path);
const convert_to_text__file_path = options.convert_to_text__file_path
|| (options.convert_to_text__file_name ? this.test_articles_directory + options.convert_to_text__file_name : Chinese_converter.to_converted_file_path(convert_from_text__file_path));
const convert_to_text__file_status = CeL.fso_status(convert_to_text__file_path);
const need_to_generate_new_convert_to_text__file = options.regenerate_converted || !convert_to_text__file_status || convert_from_text__file_status.mtime - convert_to_text__file_status.mtime > 0;
return { convert_from_text__file_path, convert_from_text__file_status, convert_to_text__file_path, convert_to_text__file_status, need_to_generate_new_convert_to_text__file };
}
async function not_new_article_to_check(convert_from_text__file_name, options) {
options = CeL.setup_options(options);
const { convert_from_text__file_path, convert_from_text__file_status, convert_to_text__file_path, convert_to_text__file_status, need_to_generate_new_convert_to_text__file } = get_convert_to_text__file_status.call(this, convert_from_text__file_name, options);
if (need_to_generate_new_convert_to_text__file) {
//console.trace('重新生成 .converted.* 解答檔案。');
await this.regenerate_converted(convert_from_text__file_path, convert_to_text__file_path, { ...options, convert_from_text__file_name, });
}
if (options.recheck) {
// 既然要重新檢查,即便詞典檔是舊的,依然算作有新變化。
return;
}
// -----------------------------------
// 檢查上一次測試後,是否有新詞典檔。
const latest_test_result = options.latest_test_result && options.latest_test_result[options.test_name];
const latest_test_result_for_file = latest_test_result && latest_test_result.test_results && latest_test_result.test_results[convert_from_text__file_name];
const latest_test_result_date = latest_test_result_for_file?.error_count === 0 ? Date.parse(latest_test_result_for_file?.date)
// 檢查是否有比測試檔或 .converted.* 解答檔案更新的新詞典檔。
: convert_to_text__file_status ? Math.max(convert_from_text__file_status.mtime.getTime(), convert_to_text__file_status.mtime.getTime()) : convert_from_text__file_status.mtime.getTime();
//console.trace(this.dictionary_file_paths);
for (const dictionary_file_path of Object.values(this.dictionary_file_paths)) {
const dictionary_file_status = CeL.fso_status(dictionary_file_path);
//console.trace(dictionary_file_status);
//console.trace([dictionary_file_status.mtime - latest_test_result_date, convert_from_text__file_status && convert_from_text__file_status.mtime - dictionary_file_status.mtime]);
if (dictionary_file_status.mtime - latest_test_result_date > 0) {
CeL.info(`${not_new_article_to_check.name}: ${convert_from_text__file_name}: 有新詞典檔 ${dictionary_file_path}`);
if (latest_test_result)
delete latest_test_result[convert_from_text__file_name];
return;
}
}
// 檢查上一次測試是否比測試檔更新。
//console.trace(latest_test_result_date - convert_from_text__file_status.mtime);
if (latest_test_result_date - convert_from_text__file_status.mtime > 0) {
//console.trace(!convert_from_text__file_status || latest_test_result_date > convert_from_text__file_status.mtime);
return !convert_to_text__file_status || latest_test_result_date > convert_to_text__file_status.mtime;
}
}
const KEY_files_loaded = Symbol('files loaded');
function load_text_to_check(should_be_text__file_name, options) {
if (CeL.is_Object(should_be_text__file_name)) {
if (should_be_text__file_name.all) {
CeL.read_directory(this.test_articles_directory).forEach(from_file_name => {
const matched = from_file_name.match(/watch_target\.(?<work_title>[^.]+)\.(?<to_language>TW|CN)\.\w+$/);
if (matched) {
this.load_text_to_check(from_file_name, {
export: { work_title: matched.groups.work_title }
});
}
});
return;
}
if (should_be_text__file_name.work_title) {
options = CeL.setup_options(options);
if (!options.export)
options.export = Object.create(null);
if (!options.export.work_title)
options.export.work_title = should_be_text__file_name.work_title;
// e.g., "watch_target.第一序列.TW.txt"
should_be_text__file_name = `watch_target.${should_be_text__file_name.work_title}.${should_be_text__file_name.convert_to_language}.${DEFAULT_TEST_FILE_EXTENSION}`;
//console.trace(should_be_text__file_name);
} else {
throw new Error(`${load_text_to_check.name}: Invalid should_be_text__file_name: ${JSON.stringify(should_be_text__file_name)}`);
}
}
let check_language = should_be_text__file_name.match(/\.(TW|CN)\.\w+$/);
//console.trace([should_be_text__file_name, check_language]);
if (!check_language) {
CeL.error(`無法判別檔案之語言: ${should_be_text__file_name}`);
return;
}
check_language = check_language[1];
const convert_to_text__data = get_convert_to_text__file_status.call(this, should_be_text__file_name, options);
const should_be_text__file_path = convert_to_text__data.convert_from_text__file_path;
if (!this.generate_condition_for_language
|| options?.reset && !this.generate_condition_for_language.only_default) {
//console.trace('初始化。');
this.generate_condition_for_language = { [KEY_files_loaded]: [], only_default: true };
if (!options?.is_default)
this.load_default_text_to_check();
}
if (this.generate_condition_for_language[KEY_files_loaded].includes(should_be_text__file_path)) {
CeL.log(`${load_text_to_check.name}: The file is already loaded, skip ${should_be_text__file_path}`);
return;
}
if (!options?.is_default)
delete this.generate_condition_for_language.only_default;
this.generate_condition_for_language[KEY_files_loaded].push(should_be_text__file_path);
const should_be_texts = get_paragraphs_of_file(should_be_text__file_path, { with_configurations: true });
if (!should_be_texts)
return;
const source_text__file_path = convert_to_text__data.convert_to_text__file_path;
if (convert_to_text__data.need_to_generate_new_convert_to_text__file) {
//console.trace('重新生成 .converted.* 解答檔案。');
return this.regenerate_converted(should_be_text__file_path, source_text__file_path, {
...options,
convert_from_text__file_name: should_be_text__file_name, text_is_TW: check_language === 'TW',
convert_options: {
...regenerate_converted.default_convert_options,
cache_directory: CeL.append_path_separator(regenerate_converted.default_convert_options.cache_directory + should_be_text__file_name)
}
}).then(setup_generate_condition_for.bind(this));
} else {
return setup_generate_condition_for.call(this);
}
function setup_generate_condition_for() {
// should_be_text__file_path: .TW.* 為轉換之答案/標的,因此檢查的是相反語言。 .converted 才是原文!
const source_texts = get_paragraphs_of_file(source_text__file_path);
if (!source_texts)
return;
if (should_be_texts.length !== source_texts.length) {
CeL.error(`${should_be_text__file_name} 與 ${should_be_text__file_path} 含有不同數量之字串!此${CeL.gettext.get_alias(check_language)}之標的檔與欲測試之項目數不符,將不採用解答!若檔案為自動生成,您可以刪除舊檔後,重新生成轉換標的檔案。`);
return;
}
//console.log(this.generate_condition_for_language);
// this.generate_condition_for_language[convert_to_language] = { convert_from_text: should_convert_to_text, ... }
const generate_condition_for = this.generate_condition_for_language[check_language]
|| (this.generate_condition_for_language[check_language] = new Map);
const generate_condition_for__title = `${options?.export?.work_title ? `《${options.export.work_title}》` : '通用 '
}${CeL.gettext.get_alias(check_language === 'TW' ? 'CN' : 'TW')}→${CeL.gettext.get_alias(check_language)}`;
should_be_texts.forEach((should_convert_to_text, index) => {
const configuration = should_be_texts.configurations[should_convert_to_text];
//console.trace([should_convert_to_text, configuration]);
let text = source_texts[index];
if (false && configuration) {
console.trace([text, should_convert_to_text, configuration]);
}
if (configuration?.原文) {
if (configuration.原文 === text) {
CeL.info(`${setup_generate_condition_for.name}: 轉換前後文字相同,無需設定"原文" ${JSON.stringify(text)}: ${JSON.stringify(configuration)}`);
} else {
configuration.original_text_converted = text;
text = configuration.原文;
}
}
//console.trace([check_language === 'TW' ? CeL_CN_to_TW(text) : CeL_TW_to_CN(text), should_convert_to_text]);
if (generate_condition_for.has(text)) {
CeL.log(`${setup_generate_condition_for.name}: ${generate_condition_for__title}: 重複設定 ${JSON.stringify(text)}`);
}
generate_condition_for.set(text, { should_convert_to_text, ...options?.export, ...configuration });
});
//console.trace(generate_condition_for);
const totle_count = generate_condition_for.size;
CeL.info(`${load_text_to_check.name}: 自動檢核 ${should_be_texts.length}個${generate_condition_for__title
} 之字串。${totle_count === should_be_texts.length ? '' : `總共檢核 ${totle_count}個。`} From ${should_be_text__file_path}`);
//console.trace(this.generate_condition_for_language);
return this.generate_condition_for_language;
}
}
// 會在每次轉換都測試是否有相符之文字。
function load_default_text_to_check() {
this.text_to_check_files.forEach(from_file_name => this.load_text_to_check(from_file_name, { is_default: true }));
}
// 顯示用函數。
function report_text_to_check(options) {
if (!this.generate_condition_for_language)
return;
const SGR_style = CeL.interact.console.SGR_style;
const normal_style = (new SGR_style('fg=green;bg=black')).toString(), NG_style = (new SGR_style('fg=red;bg=white')).toString(), reset_style = (new SGR_style({ reset: true })).toString();
const generate_condition_for = this.generate_condition_for_language[options.convert_to_language];
//console.trace(generate_condition_for);
// lost_texts: 用來記錄、顯示還有哪些尚未處理。
const lost_texts = [], multi_matched = Object.create(null);
let OK_count = 0, NG_count = 0;
for (const [convert_from, convert_data] of generate_condition_for.entries()) {
if (!convert_data.work_title) {
// e.g., 常出錯詞語 @ this.text_to_check_files
continue;
}
const { check_result } = convert_data;
if (!check_result) {
lost_texts.push(convert_data.should_convert_to_text);
continue;
}
if (check_result.NG.length > 0 || check_result.OK.length /* + check_result.NG.length */ > 1) {
multi_matched[convert_from] = check_result.OK.length;
if (check_result.NG.length > 0)
multi_matched[convert_from] += ` + ${NG_style}${check_result.NG.length} NG${normal_style}`;
}
if (check_result.NG.length > 0)
NG_count++;
else
OK_count++;
}
const message = `${report_text_to_check.name}: ${OK_count} OK, ${NG_count
} NG.${lost_texts.length > 0 ? ` ${lost_texts.length} lost:\n\t${lost_texts.join('\n\t')}` : ''}`;
if (NG_count > 0) {
CeL.error(message);
} else {
CeL.log(message);
}
const multi_matched_keys = Object.keys(multi_matched);
if (multi_matched_keys.length > 0) {
// 這裡可以計算某個值出現幾次。
CeL.log(`multi matched counts:\n${normal_style
}${multi_matched_keys.map(convert_from => `\t${convert_from}: \t${multi_matched[convert_from]}`).join('\n')
}${reset_style}`);
}
return { lost_texts, OK_count, NG_count };
}
// ----------------------------------------------------------------------------
const condition_delimiter = '+';
/*
conditions will be split by `condition_delimiter`:
word
PoS:word
PoS:
// "~": 指示此 condition 為標的文字(is_target)
~PoS:word
// "!": 指示選出不符合此條件的(not_match)
!PoS:word
~!PoS:word
// 末尾的"?": 表示此條件可有可無、可以跳過(is_optional)
~!PoS:word?
// --------------------------
word:
文字
/search_pattern/flags
/search_pattern/replace_to/flags
// "~/改成了錯誤的繁體pattern/正確的繁體replace_to/flags$" 表示先進行繁簡轉換再執行此處的替代,僅僅適用於標的文字(is_target)
~/pattern/replace_to/flags
文字~/pattern/replace_to/flags
/search_pattern/flags~/pattern/replace_to/flags
文字<filter_name>filter_target
*/
// [ condition, is target, not match, tag (PoS), word / pattern, is optional / repeat range ]
const PATTERN_condition = /^(?<is_target>~)?(?<not_match>!)?(?:(?<tag>[^:+<>]+):)?(?<word>.*?)(?<is_optional>\?)?$/;
// [ all, word, do_after_converting ]
const PATTERN_do_after_converting = new RegExp('^(?<word>.*?)~(?<do_after_converting>' + CeL.PATTERN_RegExp_replacement.source.slice(1, -1) + ')?$');
// JSON.stringify(): for "\n"
function stringify_condition(condition_text) {
// .replace(/\r/g, '\\r').replace(/\n/g, '\\n')
return JSON.stringify(condition_text).slice(1, -1).replace(/\\"/g, '"');
}
function word_data_to_condition(word_data, options) {
const tag = word_data[this.KEY_PoS_tag];
return (tag ? tag + ':' : '')
+ (options?.including_prefix_spaces && word_data[KEY_prefix_spaces] ? stringify_condition(word_data[KEY_prefix_spaces]) : '')
+ (typeof word_data[this.KEY_word] === 'string' &&
stringify_condition(word_data[this.KEY_word]) || word_data[this.KEY_word] || '');
}
// parse rule
// convert {String}full_condition_text to {Object}word_data or {Object}condition
function parse_condition(full_condition_text, options) {
let target_index;
function set_as_target(condition_data) {
condition_data.is_target = true;
condition_data.full_condition_text = full_condition_text;
if (options?.matched_condition)
condition_data.matched_condition = options.matched_condition;
}
const condition = [];
const full_condition_splited = full_condition_text.split(condition_delimiter);
for (let index = 0, accumulated_target_index_diff = 0; index < full_condition_splited.length; index++) {
let token = full_condition_splited[index];
let matched = token.match(PATTERN_condition).groups;
if (/^\//.test(matched.tag) && /\(\?$/.test(matched.tag)) {
// e.g., "/^(?:a)$/"
matched.word = matched.tag + ':' + matched.word;
matched.tag = undefined;
//console.trace(matched);
}
if (/^\/(\\\/|[^\/])+$/.test(matched.word)) {
// 處理 RegExp pattern 中包含 condition_delimiter 的情況。
// e.g., ~里+/^许.+河$/ v:卷+m:/^[\\d〇一二三四五六七八九零十]+$/+~裡
const full_condition_splited_expanded = Array.isArray(options.full_condition_splited) ? full_condition_splited.concat(options.full_condition_splited.slice(options.index + 1)) : full_condition_splited;
for (let combined_token = token, next_index = index; next_index < full_condition_splited_expanded.length;) {
const next_token = full_condition_splited_expanded[++next_index];
combined_token += condition_delimiter + next_token;
const _matched = combined_token.match(PATTERN_condition).groups;
if (CeL.PATTERN_RegExp.test(_matched.word) || CeL.PATTERN_RegExp_replacement.test(_matched.word)) {
token = combined_token;
matched = _matched;
accumulated_target_index_diff += next_index - index;
index = next_index;
//console.trace([token, matched]);
}
}
if (index >= full_condition_splited.length) {
// e.g., ~干<role.type:A1>/那.+何事$/
options.combined_token_count = index - full_condition_splited.length + 1;
}
//console.log([full_condition_splited_expanded, full_condition_splited, options.full_condition_splited?.slice(options.index + 1), options]);
//console.trace([index, target_index, accumulated_target_index_diff, token, matched]);
}
const condition_data = Object.create(null);
if (matched.is_target && !options?.no_target) {
set_as_target(condition_data);
if (target_index >= 0)
CeL.warn(`${parse_condition.name}: Multiple targets: ${full_condition_text}`);
else
target_index = index - accumulated_target_index_diff;
}
let do_after_converting = matched.word && matched.word.match(PATTERN_do_after_converting);
if (do_after_converting) {
do_after_converting = do_after_converting.groups;
matched.word = do_after_converting.word;
if (do_after_converting = do_after_converting.do_after_converting?.to_RegExp({ allow_replacement: true }))
condition_data.do_after_converting = do_after_converting;
}
if (matched.word) {
let filter = matched.word.match(/^(?<word>.*?)<(?<filter_name>[^<>]+)>(?<filter_target>.*?)$/);
if (filter) {
if (!this.condition_filter)
throw new Error('No .condition_filter set but set filter: ' + matched.word);
filter = filter.groups;
const _options = { no_target: true, full_condition_splited, index };
Object.assign(condition_data, {
[this.KEY_word]: filter.word,
[KEY_filter_name]: filter.filter_name,
filter_target: parse_condition.call(this, filter.filter_target, _options)
});
//console.trace(condition_data);
if (_options.combined_token_count > 0) {
token = full_condition_splited.slice(index, index + _options.combined_token_count + 1).join(condition_delimiter);
accumulated_target_index_diff += _options.combined_token_count;
index += _options.combined_token_count;
}
} else {
//const replace_pattern = matched.word.match();
condition_data[this.KEY_word] = CeL.PATTERN_RegExp.test(matched.word) || CeL.PATTERN_RegExp_replacement.test(matched.word)
? matched.word.to_RegExp({ allow_replacement: true })
// allow '\n', '\t' in filter.
: matched.word.replace(/\\\w/g, char => JSON.parse(`"${char}"`));
}
}
condition_data.condition_text = token;
if (matched.not_match) {
// !!: 採用字串作XOR運算,可能出現錯誤。 ('!'^true)===1
condition_data.not_match = !!matched.not_match;
//console.trace([matched, condition_data]);
}
if (matched.tag)
condition_data[this.KEY_PoS_tag] = matched.tag;
if (matched.is_optional)
condition_data.is_optional = true;
//console.trace(condition_data);
condition.push(condition_data);
}
if (!(target_index >= 0) && !options?.no_target) {
// 當僅僅只有單一 token 時,預設即為當前標的。
set_as_target(condition[0]);
}
if (condition.length === 1) {
return condition[0];
}
if (!options?.no_target) {
// default: set [0] as target.
condition.target_index = target_index || 0;
}
return condition;
}
// ------------------------------------------------------------------
// 顯示用函數。
const KEY_matched_condition = 'matched condition';
function print_correction_condition(correction_condition, options) {
//console.trace(correction_condition);
const to_word_data = correction_condition.parsed[KEY_matched_condition];
let matched_condition_mark;
if (to_word_data) {
//console.log(correction_condition);
//console.log(correction_condition.parsed.parents);
//console.trace(to_word_data);
matched_condition_mark = ` 匹配的條件式: ${to_word_data.matched_condition ? `${to_word_data.matched_condition} → ` : ''}${to_word_data.full_condition_text}`;
CeL.warn(`Matched condition${matched_condition_mark}`);
}
// 自動提供可符合答案之候選條件式。
CeL.info(`Candidate correction for ${JSON.stringify(correction_condition.parsed.text)}→${JSON.stringify(correction_condition.target)} (錯誤轉換為 ${JSON.stringify(correction_condition.error_converted_to)}):`);
const {
work_title,
original_sentence_word_list,
tagged_convert_from_text,
} = options;
if (tagged_convert_from_text) {
const list = correction_condition.slice(1).filter(correction => !correction.includes('<←'));
list.push(tagged_convert_from_text.join(condition_delimiter));
CeL.info(`//${matched_condition_mark ? ' 解析錯誤 @' : ''}${work_title ? ` 《${work_title}》` : ''} ${original_sentence_word_list} (${list.join(' ')})${matched_condition_mark || ''}`);
}
CeL.info(correction_condition.join('\t'));
}
// 展示有問題的項目。
function print_section_report(configuration, options) {
const { tagged_word_list, condition_list, convert_from_text, convert_to_text, should_convert_to_text, show_tagged_word_list,
start_index, end_index, distance_token_header_to_metched } = configuration;
const { index_hash } = condition_list;
const SGR_style = CeL.interact.console.SGR_style;
const normal_style_tagged = (new SGR_style('fg=blue;bg=cyan')).toString(), marked_style_row = 'fg=red;bg=white', marked_style = (new SGR_style(marked_style_row)).toString(), reset_style = (new SGR_style({ reset: true })).toString();
const normal_style_convert_from_text_row = 'fg=green;bg=black';
const ansi_convert_from_text = new CeL.interact.console.SGR(convert_from_text);
let backward = 0, forward = 0;
const is_fragment = start_index >= 0 && should_convert_to_text.chars().length <= 4;
if (is_fragment) {
// 當截取的詞彙太短,自動擴張成一整句。
// assert: 0 <= start_index < end_index
let index = start_index;
// 向前找尋標點符號。
while (index > 0) {
const word_data = tagged_word_list[--index];
if (word_data[this.KEY_PoS_tag] === this.TAG_punctuation) {
if (index < start_index && /[、,;:。?!…]$/.test(word_data[this.KEY_word]))
index++;
break;
}
}
backward = start_index - index;
// assert: 0 <= backward <= start_index
// start from next tagged_word_list[], at least move 1 step.
// 向後找尋標點符號。
index = end_index;
while (index < tagged_word_list.length) {
const word_data = tagged_word_list[index++];
if (word_data[this.KEY_PoS_tag] === this.TAG_punctuation) {
break;
}
}
forward = index - end_index;
//console.trace([start_index, backward, end_index, forward]);
}
const tagged_word_list_pieces = start_index >= 0 ? tagged_word_list.slice(start_index - backward, end_index + forward) : tagged_word_list;
//console.trace(tagged_word_list_pieces);
let offset = convert_from_text.match(/^\s*/)[0].length, original_sentence_word_list = [];
const tagged_convert_from_text = [];
const matched_conditions = [];
//console.trace([convert_from_text, offset, distance_token_header_to_metched, start_index, backward]);
CeL.log(`${normal_style_tagged
}${CeL.gettext.get_alias(options.convert_to_language === 'TW' ? 'CN' : 'TW').slice(0, 1)
}\t${tagged_word_list_pieces.map((word_data, index) => {
const prefix_spaces = index > 0 && word_data[KEY_prefix_spaces] || '';
// condition filter 預設會排除 prefix spaces,因此將 prefix_spaces 另外列出。
// @see match_single_condition()
const text = stringify_condition(prefix_spaces) + word_data_to_condition.call(this, word_data);
tagged_convert_from_text.push(text);
original_sentence_word_list.push(prefix_spaces + word_data[this.KEY_word]);
const matched_condition_data = word_data[KEY_matched_condition];
if (matched_condition_data) {
//console.trace(matched_condition_data);
matched_conditions.push(matched_condition_data.matched_condition + ' → ' + matched_condition_data.condition_text);
}
if (backward && (index -= backward) < 0) {
return text;
}
if (prefix_spaces)
offset += prefix_spaces.length;
const start_offset = offset;
offset += word_data[this.KEY_word].length;
if (index === 0) {
// assert: convert_from_text.trimStart().startsWith(word_data_to_condition.call(this, word_data).slice(distance_token_header_to_metched));
if (distance_token_header_to_metched) {
//console.trace([distance_token_header_to_metched, prefix_spaces.length, word_data]);
// assert: distance_token_header_to_metched >= prefix_spaces.length
offset -= distance_token_header_to_metched - (word_data[KEY_prefix_spaces] || '').length;
}
}
if (!index_hash[start_index >= 0 ? start_index + index : index]) {
return text;
}
//console.trace([word_data, index_hash[index], start_offset, offset, ansi_convert_from_text.style]);
if (ansi_convert_from_text.style_at(start_offset, true)) {
// assert: 不間斷連續匹配到。先前已設定過 .style_at(start_offset, normal_style_convert_from_text_row)
// assert: ansi_convert_from_text.style_at(start_offset, true).toString() === normal_style_convert_from_text_row.toString()
ansi_convert_from_text.style_at(start_offset, null);
} else {
ansi_convert_from_text.style_at(start_offset, marked_style_row);
}
ansi_convert_from_text.style_at(offset, normal_style_convert_from_text_row);
//console.trace([start_offset, offset, convert_from_text.slice(start_offset, offset), text, convert_from_text.slice(word_data.offset, word_data.offset + word_data[this.KEY_word].length)]);
return marked_style + text + normal_style_tagged;
}).join(condition_delimiter)
}${reset_style}`);
original_sentence_word_list = original_sentence_word_list.join('');
if (is_fragment) {
// show 全句
CeL.log(`\t原文⇒${reset_style}${JSON.stringify(original_sentence_word_list)}`);
}
//console.log(ansi_convert_from_text);
//CeL.log(`\t${JSON.stringify(convert_from_text)}`);
CeL.log(`${(new SGR_style(normal_style_convert_from_text_row)).toString()
}原文:\t ${ansi_convert_from_text.toString().replace(/\r/g, '\\r').replace(/\n/g, '\\n')}${reset_style}`);
// 為轉換前後的差異文字著色。
CeL.coloring_diff(JSON.stringify(convert_to_text), JSON.stringify(should_convert_to_text), {
headers: [
`→ ${CeL.gettext.get_alias(options.convert_to_language).slice(0, 1)}\t`,
` ${options.message_should_be || '應為'}\t`
],
header_style: { fg: 'cyan' }, print: true
});
let { work_title } = options;
if (!work_title && (work_title = configuration.test_title)) {
work_title = work_title.match(/watch_target\.(.+)\.(TW|CN)/);
if (work_title)
work_title = work_title[1];
}
condition_list.forEach(condition => print_correction_condition(condition, {
work_title,
original_sentence_word_list,
tagged_convert_from_text,
}));
if (matched_conditions.length > 0) {
matched_conditions.unshift('匹配的條件式:');
CeL.log(matched_conditions.join('\n\t'));
}
if (!is_fragment) {
CeL.log(`單純 zh_conversion 轉換過程:`);
CeL.log('單純:\t ' + (options.convert_to_language === 'TW' ? CeL_CN_to_TW : CeL_TW_to_CN)(convert_from_text, { show_matched: true }));
}
if (show_tagged_word_list) {
CeL.debug(beautify_tagged_word_list(tagged_word_list_pieces), 1);
}
}
/** {Boolean}跳過長度不同的測試。 e.g., 地區習慣用詞轉換 */
const skip_tests_convert_to_different_length = true;
function check_convert_to_different_length(converted_text_String, should_be_text, no_warning, is_convert_from) {
if (converted_text_String.chars().length === should_be_text?.chars().length) {
return;
}
if (no_warning) {
;
} else if (skip_tests_convert_to_different_length) {
// 轉換前後。
CeL.warn(`${check_convert_to_different_length.name}: 預設解答與轉換後之文字長度不符!`);
} else {
// ,刪除解答
CeL.error(`${check_convert_to_different_length.name}: 預設解答與轉換後之文字長度不符,跳過此項!`);
}
// 為差異文字著色。
CeL.coloring_diff(converted_text_String, should_be_text, {
headers: [
(is_convert_from ? '轉換前:' : '轉換後:') + '\t',
`解答:\t`,
],
header_style: { fg: 'yellow' }, print: true
});
// return true: Skip this test.
return !skip_tests_convert_to_different_length;
}
// ------------------------------------------------------------------
const KEY_tag_filter = Symbol('tag filter'), KEY_tag_pattern_filter = Symbol('tag pattern filter'), KEY_general_pattern_filter = Symbol('general pattern filter'), KEY_pattern = 'pattern';
function get_convert_to_conditions(options) {
const { word_data, convertion_pairs }
// incase "Variable 'options' is null checked here, but its property is accessed without null check prior"
= options === null ? Object.create(null) : options;
let convertion_set, key = word_data[this.KEY_word], pattern;
const KEY_PoS_tag = this.KEY_PoS_tag;
function set_tag_convertion(KEY) {
convertion_set = convertion_pairs.get(KEY);
if (!convertion_set[word_data[KEY_PoS_tag]]) {
if (!options?.create)
return;
convertion_set[word_data[KEY_PoS_tag]] = new Map;
}
//console.trace(convertion_set);
return convertion_set = convertion_set[word_data[KEY_PoS_tag]];
}
if (CeL.is_RegExp(key) || options?.search_pattern) {
if (options?.try_tag && word_data[KEY_PoS_tag]) {
if (!set_tag_convertion(KEY_tag_pattern_filter))
return;
} else {
convertion_set = convertion_pairs.get(KEY_general_pattern_filter);
}
if (CeL.is_RegExp(key)) {
pattern = key;
key = key.toString().replace(/(\w)+$/, flags => flags.replace(/[g]/, ''));
} else {
const all_matched_conditions = [];
for (const convert_to_conditions of convertion_set.values()) {
//console.trace([key, convert_to_conditions[KEY_pattern]]);
// assert {Array}convert_to_conditions
if (convert_to_conditions[KEY_pattern].test(key)) {
if (!options.get_all_matched_conditions)
return convert_to_conditions;
all_matched_conditions.push(convert_to_conditions);
}
}
if (all_matched_conditions.length > 0) {
if (all_matched_conditions.length > 1) {
//console.trace(all_matched_conditions);
}
return all_matched_conditions;
}
}
} else {
if (options?.try_tag && word_data[KEY_PoS_tag]) {
if (!set_tag_convertion(KEY_tag_filter))
return;
} else {
convertion_set = convertion_pairs;
}
}
if (!convertion_set.has(key)) {
if (!options?.create)
return;
// 初始化 initialization
const convert_to_conditions = [];
if (pattern)
convert_to_conditions[KEY_pattern] = pattern;
convertion_set.set(key, convert_to_conditions);
//console.trace(convertion_set);
}
const convert_to_conditions = convertion_set.get(key);
// assert: {Array}convert_to_conditions
return options.get_all_matched_conditions ? [convert_to_conditions] : convert_to_conditions;
}
const KEY_postfix = Symbol('postfix');
function load_dictionary(file_path, options) {
const word_list = get_paragraphs_of_file(file_path);
// 初始化 initialization: convertion_pairs
const convertion_pairs = this.convertion_pairs[options.language] = new Map;
convertion_pairs.set(KEY_tag_filter, Object.create(null));
convertion_pairs.set(KEY_tag_pattern_filter, Object.create(null));
convertion_pairs.set(KEY_general_pattern_filter, new Map);
convertion_pairs.set(KEY_postfix, []);
for (let conditions of word_list) {
conditions = conditions.split('\t');
const matched_condition = conditions[0].trim();
if (conditions.length < 2 || !matched_condition) {
CeL.error(`${load_dictionary.name}: 未設定轉換條件: ${conditions.join('\t')}`);
continue;
}
const filter = parse_condition.call(this, matched_condition);
if (filter.filter_name === 'postfix') {
//console.trace(filter);
} else if (!filter[this.KEY_word] && !filter[this.KEY_PoS_tag]) {
// assert: !!matched_condition === true
CeL.error(`Invalid word filter: ${matched_condition}`);
continue;
}
if (filter.not_match)
throw new Error('NYI: not_match');
const convert_to_conditions = filter.filter_name === 'postfix' ? convertion_pairs.get(KEY_postfix)
: get_convert_to_conditions.call(this, { word_data: filter, convertion_pairs, create: true, try_tag: true });
for (let index = 1; index < conditions.length; index++) {
let condition = conditions[index];
if (!condition.trim()) {
CeL.error(`${load_dictionary.name}: Empty condition[${index}] in ${JSON.stringify(conditions)}`);
continue;
}
condition = parse_condition.call(this, condition, { matched_condition });
if (condition.do_after_converting || convert_to_conditions.length === 0 || !convert_to_conditions[convert_to_conditions.length - 1].do_after_converting) {
// TODO: 將 {Array} 之 pattern 轉成 {Regexp} 之 pattern,採用 .replace(pattern, token => match_condition(token))。
convert_to_conditions.push(condition);
} else {
// 應該將有 .do_after_converting 的擺到後面。
let index = convert_to_conditions.length - 1;
while (index > 0 && convert_to_conditions[index - 1].do_after_converting)
index--;
// assert: !convert_to_conditions[index - 1].do_after_converting && convert_to_conditions[index].do_after_converting
// 將沒有 .do_after_converting 的插入到有 .do_after_converting 的之前。
convert_to_conditions.splice(index, 0, condition);
}
}
//console.trace(convert_to_conditions);
}
//console.trace(this.convertion_pairs);
}
const KEY_synonym_pattern = Symbol('synonym pattern');
function load_synonym_dictionary() {
// this.synonyms_of_language['TW'] = {Map} { '台灣' => [ '臺灣' ] }
if (!this.synonyms_of_language)
this.synonyms_of_language = Object.create(null);
const file_paths = Object.create(null);
for (const language in dictionary_template) {
if (!this.synonyms_of_language[language]) {
// initialization
this.synonyms_of_language[language] = new Map;
this.synonyms_of_language[language][KEY_synonym_pattern] = [];
}
const synonyms_Map = this.synonyms_of_language[language];
let synonym_data = CeL.read_file(this.dictionaries_directory + `synonym.${language}.txt`);
if (!synonym_data)
continue;
synonym_data = CeL.data.Convert_Pairs.remove_comments(synonym_data.toString().replace(/\r/g, ''));
synonym_data.split('\n').forEach(line => {
if (!line)
return;
const synonyms = line.split('\t');
const 正字正詞 = synonyms.shift();
if (synonyms.length === 0) {
if (CeL.PATTERN_RegExp_replacement.test(正字正詞)) {
// {RegExp}通同字/同義詞pattern
synonyms_Map[KEY_synonym_pattern].push(正字正詞.to_RegExp({ allow_replacement: true }));
} else {
CeL.error(`${load_synonym_dictionary.name}: No synonym settle: ${正字正詞}`)
}
return;
}
// 有設定`正字正詞`時,僅允許轉換成`正字正詞`,不可轉換為俗寫。
const allowed_synonyms = 正字正詞 ? [正字正詞] : synonyms;
synonyms.forEach(synonym => {
if (!synonym)
return;
if (synonyms_Map.has(synonym))
CeL.error(`${load_synonym_dictionary.name}: 重複設定: ${JSON.stringify(synonym)}`);
synonyms_Map.set(synonym, allowed_synonyms);
});
});
}
//console.log(this.synonyms_of_language);
}
// ----------------------------------------------------------------------------
// @inner 須配合 generate_condition_LTP()。
function condition_filter_LTP(single_condition, word_data, options) {
//console.trace(options);
if (single_condition.filter_name in this.filters[options.convert_to_language])
return true;
//console.trace([single_condition, word_data, options]);
//console.trace(options.convertion_pairs.get(KEY_tag_filter).v.get('干'));
const { tagged_word_list } = options;
// assert: word_data === tagged_word_list[options.index_of_tagged_word_list]
const tagged_word_list_index_offset = options.index_of_tagged_word_list - word_data.id;
if (single_condition.filter_name === word_data.relation) {
// 指定關係。
//console.trace([single_condition.filter_target, tagged_word_list[word_data.parent]]);
// e.g., ~只<ATT>b:/表/
return match_single_condition.call(this, single_condition.filter_target, tagged_word_list[tagged_word_list_index_offset + word_data.parent], options);
}
let matched;
matched = single_condition.filter_name.match(/^←(.+)$/);
if (matched) {
matched = matched[1];
// 搜尋反向關係。
for (let index = tagged_word_list_index_offset, latest_id = -1; index < tagged_word_list.length; index++) {
const word_data_to_test = tagged_word_list[index];
if (latest_id >= word_data_to_test.id) {
// tagged_word_list 可能是 recover_original_paragraphs() 多次查詢拼合起來的。當 (latest_id > word_data_to_test.id) 的時候,已經超越本次查詢的範圍。
// assert: word_data_to_test.id === 0
return;
}
// assert: word_data_to_test.id === latest_id + 1
latest_id = word_data_to_test.id;
if (word_data_to_test.parent === word_data.id
&& word_data_to_test.relation === matched
&& match_single_condition.call(this, single_condition.filter_target, word_data_to_test, options)
) {
return true;
}
}
}
matched = single_condition.filter_name.match(/(?<property_name>(?:role|parent))(?:\.(?<sub_property_name>[^:]+):(?<sub_property_value>.+))?/);
if (matched) {
matched = matched.groups;
const filter_target = single_condition.filter_target;
//console.trace([single_condition, matched, word_data]);
// e.g., 沖<role.type:A1>/[水浴杯]/
// 搜尋 roles / parents。
return word_data[matched.property_name + 's'].some(token => {
const parent_index = tagged_word_list_index_offset + token.parent;
if (parent_index in tagged_word_list) {
// 這可能造成 JSON.stringify(tagged_word_list) 出問題 (TypeError: Converting circular structure to JSON)。
Object.assign(token, tagged_word_list[parent_index]);
}
return (!matched.sub_property_name || token[matched.sub_property_name] === matched.sub_property_value)
&& match_single_condition.call(this, filter_target, token, options);
});
}
}
function match_single_condition(single_condition, word_data, options) {
//if (!single_condition) console.trace([single_condition, word_data, options]);
if (single_condition[KEY_filter_name]) {
return this.condition_filter && this.condition_filter(single_condition, word_data, options);
}
let filter;
// 依照最佳詞性轉換。
// ICTPOS3.0词性标记集 https://gist.github.com/luw2007/6016931 http://ictclas.nlpir.org/
// CKIP中文斷詞系統 詞類標記列表 http://ckipsvr.iis.sinica.edu.tw/cat.htm https://github.com/ckiplab/ckiptagger/wiki/POS-Tags
// NLPIR 词性类别: 计算所汉语词性标记集 http://103.242.175.216:197/nlpir/html/readme.htm
filter = single_condition[this.KEY_PoS_tag];
if (filter
&& !CeL.fit_filter(filter, word_data[this.KEY_PoS_tag])) {
//if (single_condition.not_match) console.trace([single_condition, filter, word_data, CeL.fit_filter(filter, word_data[this.KEY_PoS_tag])]);
return single_condition.not_match;
}
filter = single_condition[this.KEY_word];
if (!filter
// .is_target 時, [this.KEY_word] 可能是欲改成的字串,此時不做篩選。
|| single_condition.is_target && !CeL.is_RegExp(filter)) {
return true;
}
//if (single_condition.not_match && /上/.test(filter)) console.trace([single_condition, filter, word_data, CeL.fit_filter(filter, word_data[this.KEY_word]), single_condition.not_match ^ CeL.fit_filter(filter, word_data[this.KEY_word])]);
// console.trace([single_condition, filter, CeL.fit_filter(filter, word_data[this.KEY_word])]);
return single_condition.not_match ^ (CeL.fit_filter(filter, word_data[this.KEY_word])
// 接受 condition filter 包含 prefix spaces 的情況。
//|| word_data[KEY_prefix_spaces] && typeof word_data[this.KEY_word] === 'string' && CeL.fit_filter(filter, word_data[KEY_prefix_spaces] + word_data[this.KEY_word])
);
}
function match_condition(options) {
const { conditions, word_data, tagged_word_list } = options;
//console.trace([conditions, word_data]);
if (!Array.isArray(conditions))
return match_single_condition.call(this, conditions, word_data, options) && conditions;
const target_index = conditions.target_index || 0;
//console.assert(conditions[target_index]);
//if (!conditions[target_index]) console.trace(conditions);
// 檢查當前 part。
if (!match_single_condition.call(this, conditions[target_index], word_data, options))
return;
// 向後檢查。
for (let index_of_condition = target_index + 1, index_of_target = options.index_of_tagged_word_list + 1; index_of_condition < conditions.length; index_of_condition++) {
if (index_of_target >= tagged_word_list.length)
return;
const condition = conditions[index_of_condition];
if (match_single_condition.call(this, condition, tagged_word_list[index_of_target], options)) {
index_of_target++;
} else {
if (!condition.is_optional)
return;
// Skip the condition, try next condition.
}
}
// 向前檢查。
for (let index_of_condition = target_index - 1, index_of_target = options.index_of_tagged_word_list - 1; index_of_condition >= 0; index_of_condition--) {
if (index_of_target < 0)
return;
const condition = conditions[index_of_condition];
if (match_single_condition.call(this, condition, tagged_word_list[index_of_target], options)) {
index_of_target--;
} else {
if (!condition.is_optional)
return;
// Skip the condition, try next condition.
}
}
return conditions[target_index];
}
function get_matched_condition(options) {
let all_convert_to_conditions = get_convert_to_conditions.call(this, { ...options, get_all_matched_conditions: true });
//console.trace([word_data, all_convert_to_conditions]);
//console.trace(all_convert_to_conditions);
if (!all_convert_to_conditions) {
return;
}
const all_matched_conditions = [];
all_convert_to_conditions.forEach(convert_to_conditions => {
// assert: convert_to_conditions = [{ [this.KEY_word]: '詞', [this.KEY_PoS_tag]: '詞性' }, { [this.KEY_word]: '詞', [this.KEY_PoS_tag]: '詞性' }, ...]
for (let index_of_conditions = 0; index_of_conditions < convert_to_conditions.length; index_of_conditions++) {
const conditions = convert_to_conditions[index_of_conditions];
const matched_condition = match_condition.call(this, { ...options, conditions });
if (matched_condition) {
//console.trace([matched_condition, convert_to_conditions, convert_to_conditions.pattern]);
all_matched_conditions.push(matched_condition);
if (!convert_to_conditions.pattern) {
// 對於非 pattern,僅取第一個 matched 的。
return;
}
}
}
});
if (all_matched_conditions.length > 0) {
//console.trace([all_matched_conditions, all_convert_to_conditions]);
return { all_matched_conditions, all_convert_to_conditions };
}
return { all_conver