allprofanity
Version:
A blazing-fast, multi-language profanity filter with advanced algorithms (Aho-Corasick, Bloom Filters) delivering 664% faster performance on large texts, intelligent leet-speak detection, and pattern-based context analysis
208 lines • 6.36 kB
JavaScript
/**
* Bloom Filter implementation for efficient set membership testing
*/
export class BloomFilter {
constructor(expectedItems, falsePositiveRate = 0.01) {
this.itemCount = 0;
// Calculate optimal size and hash count
this.size = this.calculateOptimalSize(expectedItems, falsePositiveRate);
this.hashCount = this.calculateOptimalHashCount(this.size, expectedItems);
this.bitArray = new Uint8Array(Math.ceil(this.size / 8));
}
/**
* Calculate optimal bit array size
*/
calculateOptimalSize(n, p) {
return Math.ceil((-n * Math.log(p)) / Math.log(2) ** 2);
}
/**
* Calculate optimal number of hash functions
*/
calculateOptimalHashCount(m, n) {
return Math.ceil((m / n) * Math.log(2));
}
/**
* Hash function 1 (FNV-1a variant)
*/
hash1(item) {
let hash = 2166136261;
for (let i = 0; i < item.length; i++) {
hash ^= item.charCodeAt(i);
hash *= 16777619;
}
return Math.abs(hash) % this.size;
}
/**
* Hash function 2 (djb2 variant)
*/
hash2(item) {
let hash = 5381;
for (let i = 0; i < item.length; i++) {
hash = (hash << 5) + hash + item.charCodeAt(i);
}
return Math.abs(hash) % this.size;
}
/**
* Generate k hash values for an item using double hashing
*/
getHashes(item) {
const hash1 = this.hash1(item);
const hash2 = this.hash2(item);
const hashes = [];
for (let i = 0; i < this.hashCount; i++) {
const hash = (hash1 + i * hash2) % this.size;
hashes.push(Math.abs(hash));
}
return hashes;
}
/**
* Set a bit in the bit array
*/
setBit(index) {
const byteIndex = Math.floor(index / 8);
const bitIndex = index % 8;
this.bitArray[byteIndex] |= 1 << bitIndex;
}
/**
* Get a bit from the bit array
*/
getBit(index) {
const byteIndex = Math.floor(index / 8);
const bitIndex = index % 8;
return (this.bitArray[byteIndex] & (1 << bitIndex)) !== 0;
}
/**
* Add an item to the bloom filter
*/
add(item) {
const hashes = this.getHashes(item);
for (const hash of hashes) {
this.setBit(hash);
}
this.itemCount++;
}
/**
* Add multiple items to the bloom filter
*/
addAll(items) {
for (const item of items) {
this.add(item);
}
}
/**
* Test if an item might be in the set
*/
mightContain(item) {
const hashes = this.getHashes(item);
for (const hash of hashes) {
if (!this.getBit(hash)) {
return false;
}
}
return true;
}
/**
* Test multiple items at once
*/
mightContainAny(items) {
return items.some((item) => this.mightContain(item));
}
/**
* Filter items that might be in the set
*/
filter(items) {
return items.filter((item) => this.mightContain(item));
}
/**
* Clear the bloom filter
*/
clear() {
this.bitArray.fill(0);
this.itemCount = 0;
}
/**
* Get current false positive probability
*/
getCurrentFalsePositiveRate() {
const ratio = this.itemCount / this.size;
return Math.pow(1 - Math.exp(-this.hashCount * ratio), this.hashCount);
}
/**
* Get bloom filter statistics
*/
getStats() {
let bitsSet = 0;
for (let i = 0; i < this.size; i++) {
if (this.getBit(i)) {
bitsSet++;
}
}
const loadFactor = bitsSet / this.size;
const estimatedFalsePositiveRate = Math.pow(loadFactor, this.hashCount);
return {
size: this.size,
hashCount: this.hashCount,
itemCount: this.itemCount,
bitsSet,
loadFactor,
estimatedFalsePositiveRate,
};
}
/**
* Serialize bloom filter to JSON
*/
toJSON() {
return {
size: this.size,
hashCount: this.hashCount,
itemCount: this.itemCount,
bitArray: Array.from(this.bitArray),
};
}
/**
* Deserialize bloom filter from JSON
*/
static fromJSON(data) {
const filter = Object.create(BloomFilter.prototype);
filter.size = data.size;
filter.hashCount = data.hashCount;
filter.itemCount = data.itemCount;
filter.bitArray = new Uint8Array(data.bitArray);
return filter;
}
/**
* Union operation with another bloom filter
*/
union(other) {
if (this.size !== other.size || this.hashCount !== other.hashCount) {
throw new Error("Bloom filters must have same size and hash count for union operation");
}
const result = new BloomFilter(1, 0.01);
result.size = this.size;
result.hashCount = this.hashCount;
result.bitArray = new Uint8Array(this.bitArray.length);
result.itemCount = this.itemCount + other.itemCount;
for (let i = 0; i < this.bitArray.length; i++) {
result.bitArray[i] = this.bitArray[i] | other.bitArray[i];
}
return result;
}
/**
* Intersection operation with another bloom filter
*/
intersect(other) {
if (this.size !== other.size || this.hashCount !== other.hashCount) {
throw new Error("Bloom filters must have same size and hash count for intersection operation");
}
const result = new BloomFilter(1, 0.01);
result.size = this.size;
result.hashCount = this.hashCount;
result.bitArray = new Uint8Array(this.bitArray.length);
result.itemCount = Math.min(this.itemCount, other.itemCount);
for (let i = 0; i < this.bitArray.length; i++) {
result.bitArray[i] = this.bitArray[i] & other.bitArray[i];
}
return result;
}
}
//# sourceMappingURL=bloom-filter.js.map