claude-flow-novice
Version:
Claude Flow Novice - Advanced orchestration platform for multi-agent AI workflows with CFN Loop architecture Includes Local RuVector Accelerator and all CFN skills for complete functionality.
439 lines (438 loc) • 16.4 kB
JavaScript
/**
* Message Deduplicator
*
* Provides SHA256-based message fingerprinting and deduplication for Redis queues.
* Part of Task 3.4: Redis Queue Consistency & Recovery (Integration Standardization Sprint 3)
*
* Features:
* - SHA256-based message fingerprinting
* - Deduplication window (default 1 hour)
* - Idempotency key tracking in Redis
* - Automatic cleanup of expired keys
* - Batch deduplication support
*
* Usage:
* const deduplicator = new MessageDeduplicator(redisClient);
* const isDuplicate = await deduplicator.isDuplicate(message);
* if (!isDuplicate) {
* await deduplicator.markProcessed(message);
* }
*/ import * as crypto from 'crypto';
import { createLogger } from './logging.js';
import { createError, ErrorCode, isRetryableError } from './errors.js';
import { withRetry } from './retry.js';
const logger = createLogger('message-deduplicator');
/**
* Default deduplication options
*/ const DEFAULT_OPTIONS = {
windowMs: 60 * 60 * 1000,
keyPrefix: 'dedup:',
autoCleanup: true,
cleanupIntervalMs: 5 * 60 * 1000,
maxRetries: 3
};
/**
* Message Deduplicator
*
* Provides idempotent message processing using SHA256-based fingerprinting.
*/ export class MessageDeduplicator {
redis;
options;
cleanupTimer = null;
stats = {
processed: 0,
duplicates: 0,
unique: 0
};
/**
* Create a new MessageDeduplicator instance
*
* @param redis - Redis client instance
* @param options - Deduplication options
*/ constructor(redis, options = {}){
this.redis = redis;
this.options = {
...DEFAULT_OPTIONS,
...options
};
// Start automatic cleanup if enabled
if (this.options.autoCleanup) {
this.startAutoCleanup();
}
logger.info('MessageDeduplicator initialized', {
windowMs: this.options.windowMs,
keyPrefix: this.options.keyPrefix,
autoCleanup: this.options.autoCleanup
});
}
/**
* Create message fingerprint (SHA256 hash)
*
* @param message - Message content to fingerprint
* @returns SHA256 hash
*/ createFingerprint(message) {
// Normalize message to JSON string for consistent hashing
let content;
if (typeof message === 'string') {
content = message;
} else if (typeof message === 'object') {
// Sort object keys for deterministic hashing
content = JSON.stringify(message, Object.keys(message).sort());
} else {
content = String(message);
}
// Generate SHA256 hash
const hash = crypto.createHash('sha256').update(content).digest('hex');
logger.debug('Created message fingerprint', {
hash: hash.substring(0, 16) + '...',
contentLength: content.length
});
return hash;
}
/**
* Check if message is a duplicate
*
* @param message - Message content to check
* @returns True if duplicate, false if unique
*/ async isDuplicate(message) {
const hash = this.createFingerprint(message);
const key = this.getRedisKey(hash);
try {
const exists = await withRetry(async ()=>{
const result = await this.redis.exists(key);
return result === 1;
}, {
maxAttempts: this.options.maxRetries,
shouldRetry: isRetryableError
});
this.stats.processed++;
if (exists) {
this.stats.duplicates++;
// Increment seen count
await this.incrementSeenCount(hash);
logger.debug('Duplicate message detected', {
hash: hash.substring(0, 16) + '...',
duplicateCount: this.stats.duplicates
});
return true;
}
this.stats.unique++;
return false;
} catch (error) {
logger.error('Failed to check duplicate', error instanceof Error ? error : new Error(String(error)), {
hash: hash.substring(0, 16) + '...'
});
// On error, assume not duplicate to allow processing
return false;
}
}
/**
* Mark message as processed
*
* @param message - Message content to mark
* @param metadata - Optional metadata to store
*/ async markProcessed(message, metadata) {
const hash = this.createFingerprint(message);
const key = this.getRedisKey(hash);
try {
const now = new Date();
const expiresAt = new Date(now.getTime() + this.options.windowMs);
const fingerprint = {
hash,
content: message,
firstSeenAt: now,
expiresAt,
seenCount: 1,
...metadata
};
await withRetry(async ()=>{
// Store fingerprint with TTL
await this.redis.set(key, JSON.stringify(fingerprint), {
PX: this.options.windowMs
});
}, {
maxAttempts: this.options.maxRetries,
shouldRetry: isRetryableError
});
logger.debug('Marked message as processed', {
hash: hash.substring(0, 16) + '...',
expiresAt: expiresAt.toISOString()
});
} catch (error) {
logger.error('Failed to mark message as processed', error instanceof Error ? error : new Error(String(error)), {
hash: hash.substring(0, 16) + '...'
});
throw createError(ErrorCode.DB_QUERY_FAILED, 'Failed to mark message as processed', {
hash
}, error instanceof Error ? error : undefined);
}
}
/**
* Batch check for duplicates
*
* @param messages - Array of messages to check
* @returns Map of message hash to duplicate status
*/ async batchIsDuplicate(messages) {
const results = new Map();
try {
// Create fingerprints for all messages
const fingerprints = messages.map((msg)=>({
message: msg,
hash: this.createFingerprint(msg)
}));
// Check existence in batch using MGET
const keys = fingerprints.map((fp)=>this.getRedisKey(fp.hash));
const existsResults = await withRetry(async ()=>{
// Use pipeline for efficient batch operations
const pipeline = this.redis.multi();
keys.forEach((key)=>pipeline.exists(key));
return await pipeline.exec();
}, {
maxAttempts: this.options.maxRetries,
shouldRetry: isRetryableError
});
// Build results map
fingerprints.forEach((fp, index)=>{
const exists = existsResults && existsResults[index] === 1;
results.set(fp.hash, !!exists);
this.stats.processed++;
if (exists) {
this.stats.duplicates++;
} else {
this.stats.unique++;
}
});
logger.debug('Batch duplicate check complete', {
totalMessages: messages.length,
duplicates: Array.from(results.values()).filter((v)=>v).length
});
return results;
} catch (error) {
logger.error('Failed to batch check duplicates', error instanceof Error ? error : new Error(String(error)), {
messageCount: messages.length
});
throw createError(ErrorCode.DB_QUERY_FAILED, 'Failed to batch check duplicates', {
messageCount: messages.length
}, error instanceof Error ? error : undefined);
}
}
/**
* Batch mark messages as processed
*
* @param messages - Array of messages to mark
*/ async batchMarkProcessed(messages) {
try {
const now = new Date();
const expiresAt = new Date(now.getTime() + this.options.windowMs);
// Use pipeline for efficient batch operations
const pipeline = this.redis.multi();
messages.forEach((message)=>{
const hash = this.createFingerprint(message);
const key = this.getRedisKey(hash);
const fingerprint = {
hash,
content: message,
firstSeenAt: now,
expiresAt,
seenCount: 1
};
pipeline.set(key, JSON.stringify(fingerprint), {
PX: this.options.windowMs
});
});
await withRetry(async ()=>await pipeline.exec(), {
maxAttempts: this.options.maxRetries,
shouldRetry: isRetryableError
});
logger.debug('Batch marked messages as processed', {
count: messages.length,
expiresAt: expiresAt.toISOString()
});
} catch (error) {
logger.error('Failed to batch mark messages', error instanceof Error ? error : new Error(String(error)), {
messageCount: messages.length
});
throw createError(ErrorCode.DB_QUERY_FAILED, 'Failed to batch mark messages', {
messageCount: messages.length
}, error instanceof Error ? error : undefined);
}
}
/**
* Get message fingerprint details
*
* @param message - Message to get fingerprint for
* @returns Fingerprint metadata or null if not found
*/ async getFingerprint(message) {
const hash = this.createFingerprint(message);
const key = this.getRedisKey(hash);
try {
const data = await withRetry(async ()=>await this.redis.get(key), {
maxAttempts: this.options.maxRetries,
shouldRetry: isRetryableError
});
if (!data) {
return null;
}
const fingerprint = JSON.parse(data);
// Convert date strings back to Date objects
fingerprint.firstSeenAt = new Date(fingerprint.firstSeenAt);
fingerprint.expiresAt = new Date(fingerprint.expiresAt);
return fingerprint;
} catch (error) {
logger.error('Failed to get fingerprint', error instanceof Error ? error : new Error(String(error)), {
hash: hash.substring(0, 16) + '...'
});
return null;
}
}
/**
* Remove message fingerprint
*
* @param message - Message to remove fingerprint for
*/ async removeFingerprint(message) {
const hash = this.createFingerprint(message);
const key = this.getRedisKey(hash);
try {
await withRetry(async ()=>await this.redis.del(key), {
maxAttempts: this.options.maxRetries,
shouldRetry: isRetryableError
});
logger.debug('Removed fingerprint', {
hash: hash.substring(0, 16) + '...'
});
} catch (error) {
logger.error('Failed to remove fingerprint', error instanceof Error ? error : new Error(String(error)), {
hash: hash.substring(0, 16) + '...'
});
throw createError(ErrorCode.DB_QUERY_FAILED, 'Failed to remove fingerprint', {
hash
}, error instanceof Error ? error : undefined);
}
}
/**
* Cleanup expired fingerprints
*
* Note: Redis automatically removes expired keys, but this can be used for manual cleanup
*
* @returns Number of fingerprints cleaned up
*/ async cleanupExpired() {
try {
const pattern = `${this.options.keyPrefix}*`;
const keys = await this.redis.keys(pattern);
let cleanedCount = 0;
for (const key of keys){
const ttl = await this.redis.ttl(key);
// If TTL is -1 (no expiration) or -2 (key doesn't exist), skip
if (ttl === -1 || ttl === -2) {
continue;
}
// If TTL is 0 or negative (expired but not yet removed), delete
if (ttl <= 0) {
await this.redis.del(key);
cleanedCount++;
}
}
logger.info('Cleaned up expired fingerprints', {
cleanedCount,
totalKeys: keys.length
});
return cleanedCount;
} catch (error) {
logger.error('Failed to cleanup expired fingerprints', error instanceof Error ? error : new Error(String(error)));
throw createError(ErrorCode.DB_QUERY_FAILED, 'Failed to cleanup expired fingerprints', {}, error instanceof Error ? error : undefined);
}
}
/**
* Get deduplication statistics
*
* @returns Current statistics
*/ async getStats() {
try {
const pattern = `${this.options.keyPrefix}*`;
const keys = await this.redis.keys(pattern);
return {
totalProcessed: this.stats.processed,
duplicatesDetected: this.stats.duplicates,
uniqueMessages: this.stats.unique,
deduplicationRate: this.stats.processed > 0 ? this.stats.duplicates / this.stats.processed : 0,
activeFingerprints: keys.length
};
} catch (error) {
logger.error('Failed to get stats', error instanceof Error ? error : new Error(String(error)));
return {
totalProcessed: this.stats.processed,
duplicatesDetected: this.stats.duplicates,
uniqueMessages: this.stats.unique,
deduplicationRate: this.stats.processed > 0 ? this.stats.duplicates / this.stats.processed : 0,
activeFingerprints: 0
};
}
}
/**
* Reset statistics
*/ resetStats() {
this.stats = {
processed: 0,
duplicates: 0,
unique: 0
};
logger.debug('Statistics reset');
}
/**
* Stop automatic cleanup
*/ stopAutoCleanup() {
if (this.cleanupTimer) {
clearInterval(this.cleanupTimer);
this.cleanupTimer = null;
logger.debug('Auto cleanup stopped');
}
}
/**
* Shutdown deduplicator (stop auto cleanup)
*/ shutdown() {
this.stopAutoCleanup();
logger.info('MessageDeduplicator shutdown');
}
/**
* Get Redis key for fingerprint hash
*/ getRedisKey(hash) {
return `${this.options.keyPrefix}${hash}`;
}
/**
* Increment seen count for fingerprint
*/ async incrementSeenCount(hash) {
const key = this.getRedisKey(hash);
try {
const data = await this.redis.get(key);
if (data) {
const fingerprint = JSON.parse(data);
fingerprint.seenCount++;
await this.redis.set(key, JSON.stringify(fingerprint), {
KEEPTTL: true
} // Preserve existing TTL
);
}
} catch (error) {
// Log but don't throw - incrementing seen count is not critical
logger.debug('Failed to increment seen count', {
hash: hash.substring(0, 16) + '...',
error: error instanceof Error ? error.message : String(error)
});
}
}
/**
* Start automatic cleanup timer
*/ startAutoCleanup() {
this.cleanupTimer = setInterval(async ()=>{
try {
await this.cleanupExpired();
} catch (error) {
logger.error('Auto cleanup failed', error instanceof Error ? error : new Error(String(error)));
}
}, this.options.cleanupIntervalMs);
logger.debug('Auto cleanup started', {
intervalMs: this.options.cleanupIntervalMs
});
}
}
//# sourceMappingURL=message-deduplicator.js.map