bad-words-thai
Version: 
Advanced profanity filter for Thai and English with language detection, fuzzy matching, and context-aware filtering
156 lines • 6.01 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
    if (k2 === undefined) k2 = k;
    var desc = Object.getOwnPropertyDescriptor(m, k);
    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
      desc = { enumerable: true, get: function() { return m[k]; } };
    }
    Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
    if (k2 === undefined) k2 = k;
    o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
    Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
    o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
    var ownKeys = function(o) {
        ownKeys = Object.getOwnPropertyNames || function (o) {
            var ar = [];
            for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
            return ar;
        };
        return ownKeys(o);
    };
    return function (mod) {
        if (mod && mod.__esModule) return mod;
        var result = {};
        if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
        __setModuleDefault(result, mod);
        return result;
    };
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.FuzzyMatcher = void 0;
const fastest_levenshtein_1 = require("fastest-levenshtein");
const stringSimilarity = __importStar(require("string-similarity"));
class FuzzyMatcher {
    static calculateLevenshteinSimilarity(str1, str2) {
        const distance = (0, fastest_levenshtein_1.distance)(str1, str2);
        const maxLength = Math.max(str1.length, str2.length);
        if (maxLength === 0)
            return 1;
        return 1 - (distance / maxLength);
    }
    static isLevenshteinMatch(str1, str2, threshold = 0.8) {
        const similarity = this.calculateLevenshteinSimilarity(str1.toLowerCase(), str2.toLowerCase());
        return similarity >= threshold;
    }
    static calculateDiceCoefficient(str1, str2) {
        return stringSimilarity.compareTwoStrings(str1.toLowerCase(), str2.toLowerCase());
    }
    static isDiceMatch(str1, str2, threshold = 0.7) {
        const similarity = this.calculateDiceCoefficient(str1, str2);
        return similarity >= threshold;
    }
    static findBestMatch(mainString, targets) {
        if (targets.length === 0)
            return null;
        const matches = stringSimilarity.findBestMatch(mainString.toLowerCase(), targets.map(t => t.toLowerCase()));
        return {
            target: targets[matches.bestMatchIndex],
            rating: matches.bestMatch.rating
        };
    }
    static calculateJaroWinklerSimilarity(str1, str2) {
        const s1 = str1.toLowerCase();
        const s2 = str2.toLowerCase();
        if (s1 === s2)
            return 1;
        const len1 = s1.length;
        const len2 = s2.length;
        if (len1 === 0 || len2 === 0)
            return 0;
        const maxDist = Math.floor(Math.max(len1, len2) / 2) - 1;
        let matches = 0;
        const s1Matches = new Array(len1).fill(false);
        const s2Matches = new Array(len2).fill(false);
        for (let i = 0; i < len1; i++) {
            const start = Math.max(0, i - maxDist);
            const end = Math.min(i + maxDist + 1, len2);
            for (let j = start; j < end; j++) {
                if (s2Matches[j] || s1[i] !== s2[j])
                    continue;
                s1Matches[i] = true;
                s2Matches[j] = true;
                matches++;
                break;
            }
        }
        if (matches === 0)
            return 0;
        let transpositions = 0;
        let k = 0;
        for (let i = 0; i < len1; i++) {
            if (!s1Matches[i])
                continue;
            while (!s2Matches[k])
                k++;
            if (s1[i] !== s2[k])
                transpositions++;
            k++;
        }
        const jaro = (matches / len1 + matches / len2 + (matches - transpositions / 2) / matches) / 3;
        let prefix = 0;
        for (let i = 0; i < Math.min(len1, len2, 4); i++) {
            if (s1[i] === s2[i])
                prefix++;
            else
                break;
        }
        return jaro + prefix * 0.1 * (1 - jaro);
    }
    static containsFuzzy(text, pattern, threshold = 0.8) {
        const textLower = text.toLowerCase();
        const patternLower = pattern.toLowerCase();
        if (textLower.includes(patternLower))
            return true;
        const words = textLower.split(/\s+/);
        for (const word of words) {
            if (this.isLevenshteinMatch(word, patternLower, threshold)) {
                return true;
            }
        }
        for (let i = 0; i <= text.length - pattern.length + 2; i++) {
            const substring = text.substring(i, i + pattern.length + 2);
            if (this.isLevenshteinMatch(substring, pattern, threshold)) {
                return true;
            }
        }
        return false;
    }
    static findAllFuzzyMatches(text, patterns, threshold = 0.8) {
        const matches = [];
        const words = text.split(/(\s+)/).filter(w => w.trim());
        let position = 0;
        for (const word of words) {
            for (const pattern of patterns) {
                const similarity = this.calculateLevenshteinSimilarity(word.toLowerCase(), pattern.toLowerCase());
                if (similarity >= threshold) {
                    matches.push({
                        pattern,
                        position: text.indexOf(word, position),
                        similarity,
                        matchedText: word
                    });
                }
            }
            position += word.length;
        }
        return matches;
    }
}
exports.FuzzyMatcher = FuzzyMatcher;
//# sourceMappingURL=FuzzyMatcher.js.map