UNPKG

@makecode/mecab-analyzer

Version:

A MeCab-based morphological analyzer for Node.js

104 lines (103 loc) 3.66 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.analyzeWithExclusions = exports.all = exports.nouns = exports.morphs = exports.pos = exports.MECAB_TYPE = void 0; const child_process_1 = require("child_process"); const path_1 = __importDefault(require("path")); const MECAB = process.env.MECAB_PATH || path_1.default.join(process.cwd(), 'node_modules', '@makecode/mecab-analyzer', 'bin', 'mecab'); exports.MECAB_TYPE = { POS: 'pos', MORPHS: 'morphs', NOUNS: 'nouns', ALL: 'all', }; const runMecab = (text) => { try { const result = (0, child_process_1.execSync)(`${MECAB}`, { input: text, encoding: 'utf-8', }).trim(); return result; } catch (error) { console.error('형태소 분석 중 오류 발생:', error); return ''; } }; const parseMecabResult = (text, filterFunc, mapFunc) => { const result = runMecab(text); return result .split('\n') .map(line => line.split('\t')) .filter(arr => arr.length > 1 && filterFunc(arr)) .map(arr => mapFunc(arr)); }; const pos = (text) => { return parseMecabResult(text, () => true, arr => `${arr[0]},${arr[1].split(',')[0]}`); }; exports.pos = pos; const morphs = (text) => { return parseMecabResult(text, () => true, arr => arr[0]); }; exports.morphs = morphs; const nouns = (text) => { return parseMecabResult(text, arr => ['NNG', 'NNP'].includes(arr[1].split(',')[0]), arr => arr[0]); }; exports.nouns = nouns; const all = (text) => { return parseMecabResult(text, () => true, arr => [arr[0]].concat(arr[1].split(','))); }; exports.all = all; const analyzeWithExclusions = (text, { analysisType = exports.MECAB_TYPE.MORPHS, pattern = /https?:\/\/[^\s]+/g, }) => { const matches = text.match(pattern) || []; let modifiedText = text; const tokens = []; matches.forEach(match => { let token; do { token = Array.from({ length: 8 }, () => { const charCode = Math.floor(Math.random() * 52); return String.fromCharCode(charCode < 26 ? 65 + charCode : 97 + charCode - 26); }).join(''); } while (tokens.find(t => t.token === token)); modifiedText = modifiedText.replace(match, token); tokens.push({ original: match, token }); }); let mecabResult; switch (analysisType) { case exports.MECAB_TYPE.POS: mecabResult = (0, exports.pos)(modifiedText); break; case exports.MECAB_TYPE.ALL: mecabResult = (0, exports.all)(modifiedText); break; case exports.MECAB_TYPE.MORPHS: default: mecabResult = (0, exports.morphs)(modifiedText); break; } if (Array.isArray(mecabResult) && Array.isArray(mecabResult[0])) { return mecabResult.map(arr => arr.map(token => { const match = tokens.find(m => m.token === token); return match ? match.original : token; })); } else { return mecabResult.map(token => { const match = tokens.find(m => m.token === token.split(',')[0]); return match ? match.original : token; }); } }; exports.analyzeWithExclusions = analyzeWithExclusions; exports.default = { MECAB_TYPE: exports.MECAB_TYPE, pos: exports.pos, morphs: exports.morphs, nouns: exports.nouns, all: exports.all, analyzeWithExclusions: exports.analyzeWithExclusions, };