@makecode/mecab-analyzer
Version:
A MeCab-based morphological analyzer for Node.js
104 lines (103 loc) • 3.66 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.analyzeWithExclusions = exports.all = exports.nouns = exports.morphs = exports.pos = exports.MECAB_TYPE = void 0;
const child_process_1 = require("child_process");
const path_1 = __importDefault(require("path"));
const MECAB = process.env.MECAB_PATH ||
path_1.default.join(process.cwd(), 'node_modules', '@makecode/mecab-analyzer', 'bin', 'mecab');
exports.MECAB_TYPE = {
POS: 'pos',
MORPHS: 'morphs',
NOUNS: 'nouns',
ALL: 'all',
};
const runMecab = (text) => {
try {
const result = (0, child_process_1.execSync)(`${MECAB}`, {
input: text,
encoding: 'utf-8',
}).trim();
return result;
}
catch (error) {
console.error('형태소 분석 중 오류 발생:', error);
return '';
}
};
const parseMecabResult = (text, filterFunc, mapFunc) => {
const result = runMecab(text);
return result
.split('\n')
.map(line => line.split('\t'))
.filter(arr => arr.length > 1 && filterFunc(arr))
.map(arr => mapFunc(arr));
};
const pos = (text) => {
return parseMecabResult(text, () => true, arr => `${arr[0]},${arr[1].split(',')[0]}`);
};
exports.pos = pos;
const morphs = (text) => {
return parseMecabResult(text, () => true, arr => arr[0]);
};
exports.morphs = morphs;
const nouns = (text) => {
return parseMecabResult(text, arr => ['NNG', 'NNP'].includes(arr[1].split(',')[0]), arr => arr[0]);
};
exports.nouns = nouns;
const all = (text) => {
return parseMecabResult(text, () => true, arr => [arr[0]].concat(arr[1].split(',')));
};
exports.all = all;
const analyzeWithExclusions = (text, { analysisType = exports.MECAB_TYPE.MORPHS, pattern = /https?:\/\/[^\s]+/g, }) => {
const matches = text.match(pattern) || [];
let modifiedText = text;
const tokens = [];
matches.forEach(match => {
let token;
do {
token = Array.from({ length: 8 }, () => {
const charCode = Math.floor(Math.random() * 52);
return String.fromCharCode(charCode < 26 ? 65 + charCode : 97 + charCode - 26);
}).join('');
} while (tokens.find(t => t.token === token));
modifiedText = modifiedText.replace(match, token);
tokens.push({ original: match, token });
});
let mecabResult;
switch (analysisType) {
case exports.MECAB_TYPE.POS:
mecabResult = (0, exports.pos)(modifiedText);
break;
case exports.MECAB_TYPE.ALL:
mecabResult = (0, exports.all)(modifiedText);
break;
case exports.MECAB_TYPE.MORPHS:
default:
mecabResult = (0, exports.morphs)(modifiedText);
break;
}
if (Array.isArray(mecabResult) && Array.isArray(mecabResult[0])) {
return mecabResult.map(arr => arr.map(token => {
const match = tokens.find(m => m.token === token);
return match ? match.original : token;
}));
}
else {
return mecabResult.map(token => {
const match = tokens.find(m => m.token === token.split(',')[0]);
return match ? match.original : token;
});
}
};
exports.analyzeWithExclusions = analyzeWithExclusions;
exports.default = {
MECAB_TYPE: exports.MECAB_TYPE,
pos: exports.pos,
morphs: exports.morphs,
nouns: exports.nouns,
all: exports.all,
analyzeWithExclusions: exports.analyzeWithExclusions,
};