UNPKG

paknevis

Version:

Paknevis.js is a persian text formatter tool.

82 lines (75 loc) 3.23 kB
import { defaultBadWords } from './data/badWords'; /** * Base class of Paknevis.js * @class */ export class Paknevis { /** * This function normalise strings to improve detection of banned words. * @param {string} text your text * @returns {string} */ private static normalise(text: string): string { return text .replace(/[\u200c\u200d\u200e\u200f\u202a-\u202e\s‌‏‍ ]/gm, '') .replace(/[,،؛:!؟?"'«»‹›()\[\]{}\-_/\*~^@#$%=\|`']/gm, '') .replace(/ك/gm, 'ک') .replace(/[۰-۹]/gm, d => String.fromCharCode(d.charCodeAt(0) - 1728)) .replace(/[٠-٩]/gm, d => String.fromCharCode(d.charCodeAt(0) - 1632)) .replace(/[\u064b-\u0652\u0670]/gm, '') .replace(/ـ/gm, '') .replace(/[يى]/gm, 'ی') .replace(/[ثص]/gm, 'س') .replace(/[زذضظ]/gm, 'ز') .replace(/ح/gm, 'ه') .replace(/[تط]/gm, 'ت') .replace(/[عءئ]/gm, 'ا') .replace(/[ؤو]/gm, 'و') .replace(/[^آابپتثجچحخدذرزسشصضطظعغفقکگلمنوهی0-9]/gm, '').toLocaleLowerCase('fa'); }; /** * This function checks if text contains any filtered word.good for just checking. * @param {string} text input text * @param {string[]} [extraWords] use this to add any extra words in array. * @returns {boolean} */ static hasBadWords(text: string, extraWords: string[] = []): boolean { let arrayOfWords: string[] = Array.from(new Set([...defaultBadWords, ...extraWords])); return arrayOfWords.some(word => this.normalise(text).includes(word)) } /** * This function censores banned words.note that it could not find words in some scenarios. * @param {string} text input text * @param {string} [censorChar] good for masking banned word. defualt is empty. * @param {string[]} [extraWords] use this to add any extra words in array. * @returns {string} */ static censor(text: string, censorChar: string = "", extraWords: string[] = []): string { const allWords: string[] = Array.from(new Set([...defaultBadWords, ...extraWords])) const origChars: string[] = [...text]; const len: number = origChars.length; const marks: any[] = Array(len).fill(false); allWords.forEach(word => { const nword: string = this.normalise(word); for (let i = 0; i < len; i++) { for (let l = 1;l <= Math.min(20, len - i); l++) { const slice = origChars.slice(i, i + l).join(''); if(this.normalise(slice) === nword) { for (let k = i; k < i + l; k++) marks[k] = true } } } }); let result: string = ''; for (let i = 0; i < len; i++) { if(marks[i]) { if (censorChar && (i === 0 || !marks[i - 1])) { result += censorChar.repeat(1); } } else { result += origChars[i] } } return result; } }