UNPKG

pdq-wasm

Version:

WebAssembly bindings for Meta's PDQ perceptual image hashing algorithm

388 lines 12.8 kB
/** * Browser-specific utilities for PDQ perceptual hashing * These utilities provide convenient helpers for working with PDQ in browser environments */ /** * Environment detection result */ export interface RuntimeEnvironment { /** Type of environment detected */ type: 'browser' | 'worker' | 'node' | 'unknown'; /** Whether the environment supports generateHashFromDataUrl */ supportsDataUrl: boolean; /** Whether the environment supports generateHashFromBlob */ supportsBlob: boolean; /** Recommended hash generation function name */ recommendedAPI: string; } /** * Detect the current runtime environment and recommend the appropriate API * * @returns Environment information and API recommendations * * @example * ```typescript * import { getEnvironment } from 'pdq-wasm/browser'; * * const env = getEnvironment(); * console.log(`Running in: ${env.type}`); * console.log(`Use: ${env.recommendedAPI}`); * * if (env.supportsBlob) { * const hash = await generateHashFromBlob(file); * } else if (env.supportsDataUrl) { * const hash = await generateHashFromDataUrl(dataUrl); * } * ``` */ export declare function getEnvironment(): RuntimeEnvironment; /** * Result from a hash existence lookup */ export interface HashLookupResult { /** Whether the hash exists in the storage system */ exists: boolean; /** Optional data associated with the existing hash */ existing?: any; } /** * Hash checker function with optional chainable modifiers */ export type HashChecker = ((hash: string) => Promise<HashLookupResult>) & { /** * Returns a new checker that gracefully handles invalid hashes * Invalid hashes return `{ exists: false, existing: null }` instead of throwing * * @example * ```typescript * const checker = createHashChecker(lookup).ignoreInvalid(); * * // Invalid hash - returns false instead of throwing * const result = await checker('invalid'); // { exists: false, existing: null } * ``` */ ignoreInvalid(): HashChecker; /** * Returns a new checker with result caching (memoization) * * @param ttl - Time-to-live in milliseconds (default: Infinity - cache forever) * @param maxSize - Maximum number of entries to cache (default: 1000). Uses LRU eviction. * * @example * ```typescript * // Cache forever with default max size (1000 entries) * const checker = createHashChecker(lookup).cached(); * * // Cache with 5 minute expiration and custom max size * const checker = createHashChecker(lookup).cached(5 * 60 * 1000, 500); * * // Cache with custom max size only (no TTL) * const checker = createHashChecker(lookup).cached(Infinity, 100); * * // Clear cache when needed * checker.clearCache?.(); * ``` */ cached(ttl?: number, maxSize?: number): HashChecker; /** * Clears the cache (only available on cached checkers) */ clearCache?(): void; }; /** * Creates a hash existence checker with a custom lookup function * Supports fluent API for validation and caching behavior * * @param lookup - Function that checks if a hash exists in your storage system * @returns A hash checker function with chainable modifiers * * @example * ```typescript * // Simple checker - throws on invalid hash * const checkHash = createHashChecker(async (hash) => { * const { data } = await supabase.rpc('check_hash_exists', { p_hash: hash }); * return data; * }); * * const result = await checkHash(myHash); * ``` * * @example * ```typescript * // With REST API * const checkHash = createHashChecker(async (hash) => { * const response = await fetch(`/api/hashes/${hash}`); * return response.json(); * }); * ``` * * @example * ```typescript * // Gracefully ignore invalid hashes * const checkHash = createHashChecker(lookup).ignoreInvalid(); * * // Invalid hash returns { exists: false } instead of throwing * const result = await checkHash('invalid-hash'); * ``` * * @example * ```typescript * // Cached with TTL * const checkHash = createHashChecker(lookup).cached(5 * 60 * 1000); * * // First call hits the database * await checkHash(hash1); * * // Second call within 5 minutes uses cached result * await checkHash(hash1); * ``` * * @example * ```typescript * // Combined - ignore invalid + cached * const checkHash = createHashChecker(lookup) * .ignoreInvalid() * .cached(60 * 60 * 1000); // 1 hour cache * * // Clear cache when needed * checkHash.clearCache?.(); * ``` */ export declare function createHashChecker(lookup: (hash: string) => Promise<HashLookupResult>): HashChecker; /** * Calculate Hamming distance between two PDQ hash strings (hex format) * Convenience wrapper around PDQ.hammingDistance that works with hex strings * * @param hash1 - First PDQ hash (64 hex characters) * @param hash2 - Second PDQ hash (64 hex characters) * @returns Hamming distance (0-256, where 0 = identical, 256 = completely different) * * @throws Error if either hash is not 64 hex characters * * @example * ```typescript * const hash1 = 'a1b2c3d4...'; // 64 hex chars * const hash2 = 'e5f6g7h8...'; // 64 hex chars * * const distance = hammingDistance(hash1, hash2); * console.log(`Distance: ${distance} bits`); * * if (distance <= 31) { * console.log('Images are likely duplicates'); * } * ``` */ export declare function hammingDistance(hash1: string, hash2: string): number; /** * Image data structure for PDQ hashing * Note: RGBA data from canvas is automatically converted to RGB internally */ export interface PDQImageData { data: Uint8Array; width: number; height: number; channels: 1 | 3; } /** * Generate PDQ hash from a Blob or File in a worker-compatible way * * **✅ RECOMMENDED** - Works in both browser main thread and Web Workers * * Uses modern browser APIs (createImageBitmap + OffscreenCanvas) that work across contexts. * This is the **preferred API** for most use cases, especially if you need Web Worker support. * * **For Workers:** This is the ONLY API that works - {@link generateHashFromDataUrl} will fail. * * **Browser Support:** * - Chrome 69+ (full support) * - Firefox 105+ (OffscreenCanvas added in 105) * - Safari 16.4+ (OffscreenCanvas support) * - Edge 79+ * * **For older browsers** (main thread only), use {@link generateHashFromDataUrl} as fallback. * * @param blob Image blob or file * @returns Hex-encoded PDQ hash (64 character hex string) * @throws {Error} If createImageBitmap or OffscreenCanvas unavailable * @throws {Error} If image fails to decode or has invalid dimensions * @throws {Error} If image exceeds maximum dimension limit (10,000px) * * @see {@link generateHashFromDataUrl} for legacy browser fallback (main thread only) * @see {@link getEnvironment} to detect runtime environment and choose the right API * * @example * ```typescript * // ✅ In a Web Worker (PREFERRED) * self.onmessage = async (event) => { * const file = event.data.file; * const hash = await generateHashFromBlob(file); * self.postMessage({ hash }); * }; * ``` * * @example * ```typescript * // ✅ In a browser main thread (also works) * const fileInput = document.querySelector('input[type="file"]'); * const file = fileInput.files[0]; * const hash = await generateHashFromBlob(file); * console.log('Hash:', hash); * ``` * * @example * ```typescript * // ✅ With fetch API * const response = await fetch('image.jpg'); * const blob = await response.blob(); * const hash = await generateHashFromBlob(blob); * ``` */ export declare function generateHashFromBlob(blob: Blob): Promise<string>; /** * Generate PDQ perceptual hash from an image data URL or blob URL * * **⚠️ BROWSER MAIN THREAD ONLY** - Requires DOM APIs (Image, Canvas, document) * * **For Web Workers:** Use {@link generateHashFromBlob} instead, which works in both * browsers and workers using modern APIs (createImageBitmap + OffscreenCanvas). * * **Migration Guide:** * ```typescript * // ❌ DON'T use in workers (will throw error) * const hash = await generateHashFromDataUrl(dataUrl); * * // ✅ DO use in workers * const hash = await generateHashFromBlob(file); // file is Blob/File * ``` * * **Auto-cleanup:** Blob URLs can be automatically revoked after processing using the * `autoRevoke` parameter to prevent memory leaks. Useful when you don't need the blob * URL for preview display. Data URLs (data:image/...) are never revoked. * * @param dataUrl - Image data URL (data:image/...) or blob URL (blob:...) * @param autoRevoke - Automatically revoke blob URLs after processing (default: false) * @returns Promise resolving to 64-character hex hash string * * @throws Error if called in non-browser main thread environment (e.g., Web Worker, Node.js) * @throws Error if image fails to load * @throws Error if canvas context cannot be obtained * * @see {@link generateHashFromBlob} for worker-compatible alternative * @see {@link getEnvironment} to detect runtime environment and choose the right API * * @example * ```typescript * // Auto-revoke blob URL (when you don't need it for display) * const file = input.files[0]; * const blobUrl = URL.createObjectURL(file); * const hash = await generateHashFromDataUrl(blobUrl, true); * // Blob URL automatically revoked! * ``` * * @example * ```typescript * // Keep blob URL for preview (manual revocation required) * const blobUrl = URL.createObjectURL(file); * const hash = await generateHashFromDataUrl(blobUrl, false); * // Display preview using blobUrl... * // Later: URL.revokeObjectURL(blobUrl); * ``` * * @example * ```typescript * // From canvas (data URLs don't need revocation) * const canvas = document.getElementById('myCanvas'); * const dataUrl = canvas.toDataURL('image/png'); * const hash = await generateHashFromDataUrl(dataUrl); * ``` */ export declare function generateHashFromDataUrl(dataUrl: string, autoRevoke?: boolean): Promise<string>; /** * File with hash metadata */ export interface FileWithHash { /** Unique identifier for the file */ id: string; /** File name */ name: string; /** Preview data URL or blob URL */ preview: string; /** MIME type */ type: string; /** Optional metadata including hash information */ meta?: { /** PDQ hash (64 hex characters) or null if hashing failed */ hash?: string | null; /** Error message if hashing failed */ hashError?: string; /** Whether file is selected */ isSelected?: boolean; /** File location */ location?: string; /** User note */ note?: string; }; } /** * Progress information for duplicate detection */ export interface DetectionProgress { /** Total number of files to process */ totalFiles: number; /** Number of files processed so far */ processedFiles: number; /** Name of file currently being processed */ currentFile: string; /** Number of duplicates found so far */ duplicatesFound: number; } /** * Callback function for progress updates */ export type ProgressCallback = (progress: DetectionProgress) => void; /** * Detect duplicate images by comparing PDQ perceptual hashes * Generates hashes for all images and finds groups of similar images * * @param files - Array of files with preview URLs * @param threshold - Hamming distance threshold for duplicates (default: 31, PDQ recommended) * @param onProgress - Optional callback for progress updates * @returns Promise resolving to array of duplicate groups * * @example * ```typescript * const files = [ * { id: '1', name: 'photo1.jpg', preview: 'blob:...', type: 'image/jpeg' }, * { id: '2', name: 'photo2.jpg', preview: 'blob:...', type: 'image/jpeg' }, * ]; * * const duplicates = await detectDuplicatesByHash(files); * * duplicates.forEach(group => { * console.log('Duplicate group:'); * group.forEach(file => console.log(` - ${file.name}`)); * }); * ``` * * @example * ```typescript * // With progress callback * const duplicates = await detectDuplicatesByHash( * files, * 31, * (progress) => { * console.log(`${progress.processedFiles}/${progress.totalFiles} processed`); * console.log(`Currently processing: ${progress.currentFile}`); * console.log(`Duplicates found: ${progress.duplicatesFound}`); * } * ); * ``` * * @example * ```typescript * // Custom threshold (more strict) * const duplicates = await detectDuplicatesByHash(files, 15); * ``` */ export declare function detectDuplicatesByHash(files: FileWithHash[], threshold?: number, onProgress?: ProgressCallback): Promise<FileWithHash[][]>; //# sourceMappingURL=browser.d.ts.map