UNPKG

@odict/opencc-js

Version:

The JavaScript version of Open Chinese Convert (OpenCC)

125 lines (123 loc) 4.21 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var __publicField = (obj, key, value) => __defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value); // src/trie.ts var trie_exports = {}; __export(trie_exports, { Trie: () => Trie }); module.exports = __toCommonJS(trie_exports); var Trie = class { constructor() { __publicField(this, "map"); this.map = /* @__PURE__ */ new Map(); } /** * Add a word to the trie * @param sourceText The string to match * @param replacementText The string to replace with */ addWord(sourceText, replacementText) { let currentMap = this.map; for (const character of sourceText) { const codePoint = character.codePointAt(0); const existingNode = currentMap.get(codePoint); if (existingNode == null) { const newNode = /* @__PURE__ */ new Map(); currentMap.set(codePoint, newNode); currentMap = newNode; } else { currentMap = existingNode; } } currentMap.trie_val = replacementText; } /** * Load a JSON dictionary (Record<string, string>) * @param dict JSON dictionary */ loadDict(dict) { for (const [key, value] of Object.entries(dict)) { this.addWord(key, value); } } /** * Load multiple JSON dictionaries * @param dicts Array of JSON dictionaries */ loadDictGroup(dicts) { dicts.forEach((dict) => { this.loadDict(dict); }); } /** * Convert a string using the trie * @param inputString The string to convert */ convert(inputString) { const rootMap = this.map; const inputLength = inputString.length; const resultParts = []; let unconvertedStartIndex = null; for (let currentIndex = 0; currentIndex < inputLength; ) { let currentTrieNode = rootMap; let longestMatchEndIndex = 0; let longestMatchValue; for (let searchIndex = currentIndex; searchIndex < inputLength; ) { const codePoint = inputString.codePointAt(searchIndex); searchIndex += codePoint > 65535 ? 2 : 1; const nextTrieNode = currentTrieNode.get(codePoint); if (typeof nextTrieNode === "undefined") { break; } currentTrieNode = nextTrieNode; const currentNodeValue = currentTrieNode.trie_val; if (typeof currentNodeValue !== "undefined") { longestMatchEndIndex = searchIndex; longestMatchValue = currentNodeValue; } } if (longestMatchEndIndex > 0) { if (unconvertedStartIndex !== null) { resultParts.push( inputString.slice(unconvertedStartIndex, currentIndex) ); unconvertedStartIndex = null; } resultParts.push(longestMatchValue); currentIndex = longestMatchEndIndex; } else { if (unconvertedStartIndex === null) { unconvertedStartIndex = currentIndex; } const codePoint = inputString.codePointAt(currentIndex); currentIndex += codePoint > 65535 ? 2 : 1; } } if (unconvertedStartIndex !== null) { resultParts.push(inputString.slice(unconvertedStartIndex, inputLength)); } return resultParts.join(""); } }; // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { Trie });