eyo-kernel
Version:
Restoring the letter «ё» (yo) in russian texts
111 lines (110 loc) • 3.36 kB
JavaScript
import { Dictionary } from './dictionary.js';
const PUNCTUATION = '[{}()|<>=_"\'«»„“#$^%&*+:;,?!\u2011\\-[\\]]';
const REG_EXP = new RegExp('([А-ЯЁа-яё])[а-яё]+(?![а-яё]|\\.[ \u00A0\t]+([а-яё]|[А-ЯЁ]{2}|' +
PUNCTUATION + ')|\\.' +
PUNCTUATION + ')', 'g');
export class Eyo {
dictionary = new Dictionary();
/**
* Ищет варианты замены буквы «е» на «ё».
*/
lint(text, groupByWords = false) {
let replacement = [];
if (!text || !this.hasEYo(text)) {
return [];
}
text.replace(REG_EXP, (wordE, _0, _1, pos) => {
const wordYo = this.dictionary.restoreWord(wordE);
if (wordYo !== wordE) {
replacement.push({
before: wordE,
after: wordYo,
position: [this.getPosition(text, pos)],
});
return wordYo;
}
return wordE;
});
if (groupByWords) {
replacement.sort(this.sort);
replacement = this.delDuplicates(replacement);
}
return replacement;
}
/**
* Восстанавливает букву «ё» в тексте.
*/
restore(text) {
if (!text || !this.hasEYo(text)) {
return text || '';
}
text = text.replace(REG_EXP, (wordE) => {
const wordYo = this.dictionary.restoreWord(wordE);
return wordYo === wordE ? wordE : wordYo;
});
return text;
}
hasEYo(text) {
return text.search(/[ЕЁеё]/) > -1;
}
getPosition(text, index) {
const buf = text.substr(0, index).split(/\r?\n/);
return {
line: buf.length,
column: buf[buf.length - 1].length + 1,
index,
};
}
delDuplicates(replacements) {
const positions = {};
const result = [];
replacements.forEach(item => {
const before = item.before;
if (!positions[before]) {
positions[before] = [];
}
item.position.forEach(position => {
positions[before].push(position);
});
});
const added = {};
replacements.forEach(item => {
const before = item.before;
if (!added[before]) {
result.push({
before: item.before,
after: item.after,
position: positions[before],
});
added[before] = true;
}
});
return result;
}
sort(a, b) {
const aBefore = a.before;
const bBefore = b.before;
const aBeforeLower = aBefore.toLowerCase();
const bBeforeLower = bBefore.toLowerCase();
if (aBefore[0] !== bBefore[0] && aBeforeLower[0] === bBeforeLower[0]) {
if (aBefore > bBefore) {
return 1;
}
else {
return -1;
}
}
else {
if (aBeforeLower > bBeforeLower) {
return 1;
}
else if (aBeforeLower < bBeforeLower) {
return -1;
}
else {
return 0;
}
}
}
}
;