pdq-wasm
Version:
WebAssembly bindings for Meta's PDQ perceptual image hashing algorithm
388 lines • 12.8 kB
TypeScript
/**
* Browser-specific utilities for PDQ perceptual hashing
* These utilities provide convenient helpers for working with PDQ in browser environments
*/
/**
* Environment detection result
*/
export interface RuntimeEnvironment {
/** Type of environment detected */
type: 'browser' | 'worker' | 'node' | 'unknown';
/** Whether the environment supports generateHashFromDataUrl */
supportsDataUrl: boolean;
/** Whether the environment supports generateHashFromBlob */
supportsBlob: boolean;
/** Recommended hash generation function name */
recommendedAPI: string;
}
/**
* Detect the current runtime environment and recommend the appropriate API
*
* @returns Environment information and API recommendations
*
* @example
* ```typescript
* import { getEnvironment } from 'pdq-wasm/browser';
*
* const env = getEnvironment();
* console.log(`Running in: ${env.type}`);
* console.log(`Use: ${env.recommendedAPI}`);
*
* if (env.supportsBlob) {
* const hash = await generateHashFromBlob(file);
* } else if (env.supportsDataUrl) {
* const hash = await generateHashFromDataUrl(dataUrl);
* }
* ```
*/
export declare function getEnvironment(): RuntimeEnvironment;
/**
* Result from a hash existence lookup
*/
export interface HashLookupResult {
/** Whether the hash exists in the storage system */
exists: boolean;
/** Optional data associated with the existing hash */
existing?: any;
}
/**
* Hash checker function with optional chainable modifiers
*/
export type HashChecker = ((hash: string) => Promise<HashLookupResult>) & {
/**
* Returns a new checker that gracefully handles invalid hashes
* Invalid hashes return `{ exists: false, existing: null }` instead of throwing
*
* @example
* ```typescript
* const checker = createHashChecker(lookup).ignoreInvalid();
*
* // Invalid hash - returns false instead of throwing
* const result = await checker('invalid'); // { exists: false, existing: null }
* ```
*/
ignoreInvalid(): HashChecker;
/**
* Returns a new checker with result caching (memoization)
*
* @param ttl - Time-to-live in milliseconds (default: Infinity - cache forever)
* @param maxSize - Maximum number of entries to cache (default: 1000). Uses LRU eviction.
*
* @example
* ```typescript
* // Cache forever with default max size (1000 entries)
* const checker = createHashChecker(lookup).cached();
*
* // Cache with 5 minute expiration and custom max size
* const checker = createHashChecker(lookup).cached(5 * 60 * 1000, 500);
*
* // Cache with custom max size only (no TTL)
* const checker = createHashChecker(lookup).cached(Infinity, 100);
*
* // Clear cache when needed
* checker.clearCache?.();
* ```
*/
cached(ttl?: number, maxSize?: number): HashChecker;
/**
* Clears the cache (only available on cached checkers)
*/
clearCache?(): void;
};
/**
* Creates a hash existence checker with a custom lookup function
* Supports fluent API for validation and caching behavior
*
* @param lookup - Function that checks if a hash exists in your storage system
* @returns A hash checker function with chainable modifiers
*
* @example
* ```typescript
* // Simple checker - throws on invalid hash
* const checkHash = createHashChecker(async (hash) => {
* const { data } = await supabase.rpc('check_hash_exists', { p_hash: hash });
* return data;
* });
*
* const result = await checkHash(myHash);
* ```
*
* @example
* ```typescript
* // With REST API
* const checkHash = createHashChecker(async (hash) => {
* const response = await fetch(`/api/hashes/${hash}`);
* return response.json();
* });
* ```
*
* @example
* ```typescript
* // Gracefully ignore invalid hashes
* const checkHash = createHashChecker(lookup).ignoreInvalid();
*
* // Invalid hash returns { exists: false } instead of throwing
* const result = await checkHash('invalid-hash');
* ```
*
* @example
* ```typescript
* // Cached with TTL
* const checkHash = createHashChecker(lookup).cached(5 * 60 * 1000);
*
* // First call hits the database
* await checkHash(hash1);
*
* // Second call within 5 minutes uses cached result
* await checkHash(hash1);
* ```
*
* @example
* ```typescript
* // Combined - ignore invalid + cached
* const checkHash = createHashChecker(lookup)
* .ignoreInvalid()
* .cached(60 * 60 * 1000); // 1 hour cache
*
* // Clear cache when needed
* checkHash.clearCache?.();
* ```
*/
export declare function createHashChecker(lookup: (hash: string) => Promise<HashLookupResult>): HashChecker;
/**
* Calculate Hamming distance between two PDQ hash strings (hex format)
* Convenience wrapper around PDQ.hammingDistance that works with hex strings
*
* @param hash1 - First PDQ hash (64 hex characters)
* @param hash2 - Second PDQ hash (64 hex characters)
* @returns Hamming distance (0-256, where 0 = identical, 256 = completely different)
*
* @throws Error if either hash is not 64 hex characters
*
* @example
* ```typescript
* const hash1 = 'a1b2c3d4...'; // 64 hex chars
* const hash2 = 'e5f6g7h8...'; // 64 hex chars
*
* const distance = hammingDistance(hash1, hash2);
* console.log(`Distance: ${distance} bits`);
*
* if (distance <= 31) {
* console.log('Images are likely duplicates');
* }
* ```
*/
export declare function hammingDistance(hash1: string, hash2: string): number;
/**
* Image data structure for PDQ hashing
* Note: RGBA data from canvas is automatically converted to RGB internally
*/
export interface PDQImageData {
data: Uint8Array;
width: number;
height: number;
channels: 1 | 3;
}
/**
* Generate PDQ hash from a Blob or File in a worker-compatible way
*
* **✅ RECOMMENDED** - Works in both browser main thread and Web Workers
*
* Uses modern browser APIs (createImageBitmap + OffscreenCanvas) that work across contexts.
* This is the **preferred API** for most use cases, especially if you need Web Worker support.
*
* **For Workers:** This is the ONLY API that works - {@link generateHashFromDataUrl} will fail.
*
* **Browser Support:**
* - Chrome 69+ (full support)
* - Firefox 105+ (OffscreenCanvas added in 105)
* - Safari 16.4+ (OffscreenCanvas support)
* - Edge 79+
*
* **For older browsers** (main thread only), use {@link generateHashFromDataUrl} as fallback.
*
* @param blob Image blob or file
* @returns Hex-encoded PDQ hash (64 character hex string)
* @throws {Error} If createImageBitmap or OffscreenCanvas unavailable
* @throws {Error} If image fails to decode or has invalid dimensions
* @throws {Error} If image exceeds maximum dimension limit (10,000px)
*
* @see {@link generateHashFromDataUrl} for legacy browser fallback (main thread only)
* @see {@link getEnvironment} to detect runtime environment and choose the right API
*
* @example
* ```typescript
* // ✅ In a Web Worker (PREFERRED)
* self.onmessage = async (event) => {
* const file = event.data.file;
* const hash = await generateHashFromBlob(file);
* self.postMessage({ hash });
* };
* ```
*
* @example
* ```typescript
* // ✅ In a browser main thread (also works)
* const fileInput = document.querySelector('input[type="file"]');
* const file = fileInput.files[0];
* const hash = await generateHashFromBlob(file);
* console.log('Hash:', hash);
* ```
*
* @example
* ```typescript
* // ✅ With fetch API
* const response = await fetch('image.jpg');
* const blob = await response.blob();
* const hash = await generateHashFromBlob(blob);
* ```
*/
export declare function generateHashFromBlob(blob: Blob): Promise<string>;
/**
* Generate PDQ perceptual hash from an image data URL or blob URL
*
* **⚠️ BROWSER MAIN THREAD ONLY** - Requires DOM APIs (Image, Canvas, document)
*
* **For Web Workers:** Use {@link generateHashFromBlob} instead, which works in both
* browsers and workers using modern APIs (createImageBitmap + OffscreenCanvas).
*
* **Migration Guide:**
* ```typescript
* // ❌ DON'T use in workers (will throw error)
* const hash = await generateHashFromDataUrl(dataUrl);
*
* // ✅ DO use in workers
* const hash = await generateHashFromBlob(file); // file is Blob/File
* ```
*
* **Auto-cleanup:** Blob URLs can be automatically revoked after processing using the
* `autoRevoke` parameter to prevent memory leaks. Useful when you don't need the blob
* URL for preview display. Data URLs (data:image/...) are never revoked.
*
* @param dataUrl - Image data URL (data:image/...) or blob URL (blob:...)
* @param autoRevoke - Automatically revoke blob URLs after processing (default: false)
* @returns Promise resolving to 64-character hex hash string
*
* @throws Error if called in non-browser main thread environment (e.g., Web Worker, Node.js)
* @throws Error if image fails to load
* @throws Error if canvas context cannot be obtained
*
* @see {@link generateHashFromBlob} for worker-compatible alternative
* @see {@link getEnvironment} to detect runtime environment and choose the right API
*
* @example
* ```typescript
* // Auto-revoke blob URL (when you don't need it for display)
* const file = input.files[0];
* const blobUrl = URL.createObjectURL(file);
* const hash = await generateHashFromDataUrl(blobUrl, true);
* // Blob URL automatically revoked!
* ```
*
* @example
* ```typescript
* // Keep blob URL for preview (manual revocation required)
* const blobUrl = URL.createObjectURL(file);
* const hash = await generateHashFromDataUrl(blobUrl, false);
* // Display preview using blobUrl...
* // Later: URL.revokeObjectURL(blobUrl);
* ```
*
* @example
* ```typescript
* // From canvas (data URLs don't need revocation)
* const canvas = document.getElementById('myCanvas');
* const dataUrl = canvas.toDataURL('image/png');
* const hash = await generateHashFromDataUrl(dataUrl);
* ```
*/
export declare function generateHashFromDataUrl(dataUrl: string, autoRevoke?: boolean): Promise<string>;
/**
* File with hash metadata
*/
export interface FileWithHash {
/** Unique identifier for the file */
id: string;
/** File name */
name: string;
/** Preview data URL or blob URL */
preview: string;
/** MIME type */
type: string;
/** Optional metadata including hash information */
meta?: {
/** PDQ hash (64 hex characters) or null if hashing failed */
hash?: string | null;
/** Error message if hashing failed */
hashError?: string;
/** Whether file is selected */
isSelected?: boolean;
/** File location */
location?: string;
/** User note */
note?: string;
};
}
/**
* Progress information for duplicate detection
*/
export interface DetectionProgress {
/** Total number of files to process */
totalFiles: number;
/** Number of files processed so far */
processedFiles: number;
/** Name of file currently being processed */
currentFile: string;
/** Number of duplicates found so far */
duplicatesFound: number;
}
/**
* Callback function for progress updates
*/
export type ProgressCallback = (progress: DetectionProgress) => void;
/**
* Detect duplicate images by comparing PDQ perceptual hashes
* Generates hashes for all images and finds groups of similar images
*
* @param files - Array of files with preview URLs
* @param threshold - Hamming distance threshold for duplicates (default: 31, PDQ recommended)
* @param onProgress - Optional callback for progress updates
* @returns Promise resolving to array of duplicate groups
*
* @example
* ```typescript
* const files = [
* { id: '1', name: 'photo1.jpg', preview: 'blob:...', type: 'image/jpeg' },
* { id: '2', name: 'photo2.jpg', preview: 'blob:...', type: 'image/jpeg' },
* ];
*
* const duplicates = await detectDuplicatesByHash(files);
*
* duplicates.forEach(group => {
* console.log('Duplicate group:');
* group.forEach(file => console.log(` - ${file.name}`));
* });
* ```
*
* @example
* ```typescript
* // With progress callback
* const duplicates = await detectDuplicatesByHash(
* files,
* 31,
* (progress) => {
* console.log(`${progress.processedFiles}/${progress.totalFiles} processed`);
* console.log(`Currently processing: ${progress.currentFile}`);
* console.log(`Duplicates found: ${progress.duplicatesFound}`);
* }
* );
* ```
*
* @example
* ```typescript
* // Custom threshold (more strict)
* const duplicates = await detectDuplicatesByHash(files, 15);
* ```
*/
export declare function detectDuplicatesByHash(files: FileWithHash[], threshold?: number, onProgress?: ProgressCallback): Promise<FileWithHash[][]>;
//# sourceMappingURL=browser.d.ts.map