@odict/opencc-js
Version:
The JavaScript version of Open Chinese Convert (OpenCC)
125 lines (123 loc) • 4.21 kB
JavaScript
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
// src/trie.ts
var trie_exports = {};
__export(trie_exports, {
Trie: () => Trie
});
module.exports = __toCommonJS(trie_exports);
var Trie = class {
constructor() {
__publicField(this, "map");
this.map = /* @__PURE__ */ new Map();
}
/**
* Add a word to the trie
* @param sourceText The string to match
* @param replacementText The string to replace with
*/
addWord(sourceText, replacementText) {
let currentMap = this.map;
for (const character of sourceText) {
const codePoint = character.codePointAt(0);
const existingNode = currentMap.get(codePoint);
if (existingNode == null) {
const newNode = /* @__PURE__ */ new Map();
currentMap.set(codePoint, newNode);
currentMap = newNode;
} else {
currentMap = existingNode;
}
}
currentMap.trie_val = replacementText;
}
/**
* Load a JSON dictionary (Record<string, string>)
* @param dict JSON dictionary
*/
loadDict(dict) {
for (const [key, value] of Object.entries(dict)) {
this.addWord(key, value);
}
}
/**
* Load multiple JSON dictionaries
* @param dicts Array of JSON dictionaries
*/
loadDictGroup(dicts) {
dicts.forEach((dict) => {
this.loadDict(dict);
});
}
/**
* Convert a string using the trie
* @param inputString The string to convert
*/
convert(inputString) {
const rootMap = this.map;
const inputLength = inputString.length;
const resultParts = [];
let unconvertedStartIndex = null;
for (let currentIndex = 0; currentIndex < inputLength; ) {
let currentTrieNode = rootMap;
let longestMatchEndIndex = 0;
let longestMatchValue;
for (let searchIndex = currentIndex; searchIndex < inputLength; ) {
const codePoint = inputString.codePointAt(searchIndex);
searchIndex += codePoint > 65535 ? 2 : 1;
const nextTrieNode = currentTrieNode.get(codePoint);
if (typeof nextTrieNode === "undefined") {
break;
}
currentTrieNode = nextTrieNode;
const currentNodeValue = currentTrieNode.trie_val;
if (typeof currentNodeValue !== "undefined") {
longestMatchEndIndex = searchIndex;
longestMatchValue = currentNodeValue;
}
}
if (longestMatchEndIndex > 0) {
if (unconvertedStartIndex !== null) {
resultParts.push(
inputString.slice(unconvertedStartIndex, currentIndex)
);
unconvertedStartIndex = null;
}
resultParts.push(longestMatchValue);
currentIndex = longestMatchEndIndex;
} else {
if (unconvertedStartIndex === null) {
unconvertedStartIndex = currentIndex;
}
const codePoint = inputString.codePointAt(currentIndex);
currentIndex += codePoint > 65535 ? 2 : 1;
}
}
if (unconvertedStartIndex !== null) {
resultParts.push(inputString.slice(unconvertedStartIndex, inputLength));
}
return resultParts.join("");
}
};
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
Trie
});
;