UNPKG

paknevis

Version:

Paknevis.js is a persian text formatter tool.

79 lines (78 loc) 3.11 kB
import { defaultBadWords } from './data/badWords.js'; /** * Base class of Paknevis.js * @class */ export class Paknevis { /** * This function normalise strings to improve detection of banned words. * @param {string} text your text * @returns {string} */ static normalise(text) { return text .replace(/[\u200c\u200d\u200e\u200f\u202a-\u202e\s‌‏‍ ]/gm, '') .replace(/[,،؛:!؟?"'«»‹›()\[\]{}\-_/\*~^@#$%=\|`']/gm, '') .replace(/ك/gm, 'ک') .replace(/[۰-۹]/gm, d => String.fromCharCode(d.charCodeAt(0) - 1728)) .replace(/[٠-٩]/gm, d => String.fromCharCode(d.charCodeAt(0) - 1632)) .replace(/[\u064b-\u0652\u0670]/gm, '') .replace(/ـ/gm, '') .replace(/[يى]/gm, 'ی') .replace(/[ثص]/gm, 'س') .replace(/[زذضظ]/gm, 'ز') .replace(/ح/gm, 'ه') .replace(/[تط]/gm, 'ت') .replace(/[عءئ]/gm, 'ا') .replace(/[ؤو]/gm, 'و') .replace(/[^آابپتثجچحخدذرزسشصضطظعغفقکگلمنوهی0-9]/gm, '').toLocaleLowerCase('fa'); } ; /** * This function checks if text contains any filtered word.good for just checking. * @param {string} text input text * @param {string[]} [extraWords] use this to add any extra words in array. * @returns {boolean} */ static hasBadWords(text, extraWords = []) { let arrayOfWords = Array.from(new Set([...defaultBadWords, ...extraWords])); return arrayOfWords.some(word => this.normalise(text).includes(word)); } /** * This function censores banned words.note that it could not find words in some scenarios. * @param {string} text input text * @param {string} [censorChar] good for masking banned word. defualt is empty. * @param {string[]} [extraWords] use this to add any extra words in array. * @returns {string} */ static censor(text, censorChar = "", extraWords = []) { const allWords = Array.from(new Set([...defaultBadWords, ...extraWords])); const origChars = [...text]; const len = origChars.length; const marks = Array(len).fill(false); allWords.forEach(word => { const nword = this.normalise(word); for (let i = 0; i < len; i++) { for (let l = 1; l <= Math.min(20, len - i); l++) { const slice = origChars.slice(i, i + l).join(''); if (this.normalise(slice) === nword) { for (let k = i; k < i + l; k++) marks[k] = true; } } } }); let result = ''; for (let i = 0; i < len; i++) { if (marks[i]) { if (censorChar && (i === 0 || !marks[i - 1])) { result += censorChar.repeat(1); } } else { result += origChars[i]; } } return result; } }