UNPKG

pdq-wasm

Version:

WebAssembly bindings for Meta's PDQ perceptual image hashing algorithm

668 lines (667 loc) 27.7 kB
"use strict"; /** * PDQ (Perceptual Hash) WebAssembly bindings * * Copyright (c) Meta Platforms, Inc. and affiliates (original algorithm) * Copyright (c) 2025 (WebAssembly bindings) */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.PDQ = void 0; // Import the WASM module factory // This will be resolved differently in Node.js vs browser let createPDQModuleFactory = null; // Lazy loader for Node.js environment - deferred to avoid require() in ES modules function getWasmFactory() { if (createPDQModuleFactory !== null) { return createPDQModuleFactory; } // Check if we're in Node.js AND require is available (CommonJS or Node.js ESM with require) if (typeof process !== 'undefined' && process.versions?.node && typeof require !== 'undefined') { // Node.js environment with require available try { createPDQModuleFactory = require('../wasm/pdq.js'); return createPDQModuleFactory; } catch (e) { // WASM module not built yet return null; } } // Browser environment or Node.js ESM without require - will need to be provided via options return null; } /** * Detect if we're running in a Web Worker environment * Checks for worker-specific globals and absence of window */ function isWorkerEnvironment() { return (typeof self !== 'undefined' && // Don't require importScripts - ES module workers don't have it // Check for worker global scope types or absence of window ( // Check for any worker global scope type // @ts-ignore - WorkerGlobalScope types only available in workers typeof WorkerGlobalScope !== 'undefined' || // @ts-ignore - DedicatedWorkerGlobalScope only in dedicated workers typeof DedicatedWorkerGlobalScope !== 'undefined' || // @ts-ignore - SharedWorkerGlobalScope only in shared workers typeof SharedWorkerGlobalScope !== 'undefined' || // Simpler check: absence of window (most reliable for both classic and ES module workers) typeof window === 'undefined')); } /** * Detect if we're running in a browser (non-worker) environment */ function isBrowserEnvironment() { return typeof window !== 'undefined' && !isWorkerEnvironment(); } /** * PDQ WebAssembly implementation */ class PDQ { /** * Set a custom logger function to log PDQ operations * @param logger Function that receives log messages * @returns PDQ class for method chaining * * @example * PDQ.logger((msg) => console.log('[PDQ]', msg)); */ static setLogger(logger) { this.loggerFn = logger; return this; } /** * Enable console logging (convenience method) * @returns PDQ class for method chaining * * @example * PDQ.consoleLog(); */ static consoleLog() { return this.setLogger(console.log); } /** * Disable logging * @returns PDQ class for method chaining */ static disableLogging() { this.loggerFn = null; return this; } /** * Enable ignore invalid mode - log errors instead of throwing * @returns PDQ class for method chaining * * @example * PDQ.ignoreInvalid().consoleLog(); */ static ignoreInvalid() { this.ignoreInvalidFlag = true; return this; } /** * Disable ignore invalid mode - throw errors normally * @returns PDQ class for method chaining */ static throwOnInvalid() { this.ignoreInvalidFlag = false; return this; } /** * Internal logging method */ static log(message) { if (this.loggerFn) { this.loggerFn(message); } } /** * Internal error handling method * Either throws or logs based on ignoreInvalidFlag */ static handleError(errorMsg) { this.log(`ERROR: ${errorMsg}`); if (!this.ignoreInvalidFlag) { throw new Error(errorMsg); } } /** * Initialize the WASM module * Must be called before using any PDQ functions * * @param options Configuration options * @param options.wasmUrl URL to load WASM module from (browser only, defaults to CDN) * * @example * // Node.js (uses bundled WASM) * await PDQ.init(); * * @example * // Browser (automatically uses CDN) * await PDQ.init(); * * @example * // Browser with custom URL * await PDQ.init({ * wasmUrl: '/assets/pdq.wasm' * }); */ static async init(options = {}) { if (this.initPromise) { return this.initPromise; } this.initPromise = (async () => { this.log('Initializing PDQ WASM module...'); // Browser environment (non-worker) if (isBrowserEnvironment()) { // Default to CDN if no wasmUrl provided if (!options.wasmUrl) { // Use unpkg.com CDN - pins to current package version for stability // Users can also use @latest or a different CDN (jsDelivr, etc.) const { version } = require('../package.json'); options.wasmUrl = `https://unpkg.com/pdq-wasm@${version}/wasm/pdq.wasm`; this.log(`No wasmUrl provided, using CDN: ${options.wasmUrl}`); this.log('For production, consider self-hosting the WASM files for better reliability'); } this.log(`Loading WASM module from: ${options.wasmUrl}`); try { // Dynamically load the WASM module factory script const wasmJsUrl = options.wasmUrl.replace(/\.wasm$/, '.js'); // Security: Validate URL to prevent insecure content // Allow: HTTPS, localhost HTTP, relative URLs (for local development) const isHttps = /^https:\/\//.test(wasmJsUrl); const isLocalhost = /^http:\/\/localhost(:\d+)?/.test(wasmJsUrl) || /^http:\/\/127\.0\.0\.1(:\d+)?/.test(wasmJsUrl); const isRelative = /^\.\.?\//.test(wasmJsUrl) || !/:\/\//.test(wasmJsUrl); if (!isHttps && !isLocalhost && !isRelative) { throw new Error(`Insecure URL: ${wasmJsUrl}. Only HTTPS URLs are allowed for security (localhost HTTP and relative URLs are permitted for development).`); } // Use script.src for better security and CSP compliance const script = document.createElement('script'); script.src = wasmJsUrl; script.async = false; // Ensure synchronous execution order // Load script and wait for createPDQModule to be available await new Promise((resolve, reject) => { script.onload = () => { // Verify the factory function is available after load if (typeof window.createPDQModule === 'function') { resolve(); } else { reject(new Error('createPDQModule function not found after script load')); } }; script.onerror = () => reject(new Error(`Failed to load WASM module script from ${wasmJsUrl}`)); document.head.appendChild(script); }); const factory = window.createPDQModule; // Initialize with the WASM URL this.module = await factory({ locateFile: (path) => { if (path.endsWith('.wasm')) { return options.wasmUrl; } return path; } }); this.log('PDQ WASM module initialized successfully (browser)'); } catch (error) { const errorMsg = `Failed to load WASM module from ${options.wasmUrl}: ${error instanceof Error ? error.message : String(error)}`; this.log(`ERROR: ${errorMsg}`); throw new Error(errorMsg); } } else { // Node.js environment - use bundled WASM this.log('Loading WASM module (Node.js)...'); const factory = getWasmFactory(); if (!factory) { const errorMsg = 'PDQ WASM module not available. Make sure to run: npm run build:wasm'; this.log(`ERROR: ${errorMsg}`); throw new Error(errorMsg); } // In Node.js, read WASM file and provide as binary to avoid URL issues const path = require('path'); const fs = require('fs'); const wasmPath = path.join(__dirname, '..', 'wasm', 'pdq.wasm'); const wasmBinary = fs.readFileSync(wasmPath); this.module = await factory({ ...options, wasmBinary: wasmBinary }); this.log('PDQ WASM module initialized successfully (Node.js)'); } })(); return this.initPromise; } /** * Initialize the WASM module in a Web Worker environment * Must be called before using any PDQ functions in a worker * * @param options Worker initialization options * @param options.wasmUrl URL to the WASM file * @param options.wasmJsUrl URL to the WASM JavaScript glue code (optional, defaults to wasmUrl.replace('.wasm', '.js')) * * @example * // In a Web Worker * await PDQ.initWorker({ * wasmUrl: 'https://unpkg.com/pdq-wasm@0.3.3/wasm/pdq.wasm' * }); * * @example * // With custom JS glue code URL * await PDQ.initWorker({ * wasmUrl: '/wasm/pdq.wasm', * wasmJsUrl: '/wasm/pdq.js' * }); */ static async initWorker(options) { if (this.initPromise) { return this.initPromise; } if (!isWorkerEnvironment()) { throw new Error('initWorker() can only be called in a Web Worker environment. ' + 'Use PDQ.init() for browser or Node.js environments.'); } // Apply logger if provided if (options.logger) { this.setLogger(options.logger); } this.initPromise = (async () => { this.log('Initializing PDQ WASM module in Web Worker...'); // Derive JS glue code URL from WASM URL if not provided const wasmJsUrl = options.wasmJsUrl || (options.wasmUrl.endsWith('.wasm') ? options.wasmUrl.replace(/\.wasm$/, '.js') : options.wasmUrl + '.js'); this.log(`Loading WASM module from: ${options.wasmUrl}`); this.log(`Loading WASM JS glue code from: ${wasmJsUrl}`); try { // Load the WASM factory using the appropriate method const factory = await this.loadWorkerFactory(wasmJsUrl); // Initialize with the WASM URL this.module = await factory({ locateFile: (path) => { if (path.endsWith('.wasm')) { return options.wasmUrl; } return path; } }); this.log('PDQ WASM module initialized successfully (Web Worker)'); } catch (error) { const errorMsg = `Failed to load WASM module in worker: ${error instanceof Error ? error.message : String(error)}`; this.log(`ERROR: ${errorMsg}`); throw new Error(errorMsg); } })(); return this.initPromise; } /** * Try loading WASM factory using importScripts (for classic workers) */ static tryLoadViaImportScripts(wasmJsUrl) { // @ts-ignore - importScripts is only available in workers if (typeof importScripts !== 'function') { return null; } try { this.log('Loading via importScripts (classic worker)...'); // @ts-ignore - importScripts is only available in workers importScripts(wasmJsUrl); // The glue code should define createPDQModule on the global scope const factory = self.createPDQModule; // Validate that the factory function was properly loaded if (typeof factory !== 'function') { this.log(`ERROR: createPDQModule function not found on global scope after importing ${wasmJsUrl}. ` + 'This may indicate a custom build or incorrect WASM JS glue code. ' + 'Ensure that the glue code defines createPDQModule globally. ' + 'If you are using a custom build, you may need to expose the factory function as self.createPDQModule.'); throw new Error('PDQ WASM initialization failed: createPDQModule is not defined globally. ' + 'Check that your WASM JS glue code exposes createPDQModule on the global scope. ' + 'See documentation for custom builds.'); } this.log('Successfully loaded factory via importScripts'); return factory; } catch (error) { // importScripts failed - likely an ES module worker this.log(`importScripts failed (ES module worker?): ${error instanceof Error ? error.message : String(error)}`); return null; } } /** * Try loading WASM factory using dynamic import (for ES module workers) */ static async tryLoadViaDynamicImport(wasmJsUrl) { this.log('Using dynamic import for ES module worker...'); const module = await Promise.resolve(`${wasmJsUrl}`).then(s => __importStar(require(s))); this.log(`Imported module keys: ${Object.keys(module).join(', ')}`); // UMD modules export the factory on 'default' when dynamically imported let factory = module.default; // Check if we got a valid factory function if (typeof factory !== 'function') { // Try named exports as fallback factory = module.createPDQModule || module.PDQ; if (typeof factory !== 'function') { this.log(`Factory type: ${typeof factory}`); this.log(`module.default type: ${typeof module.default}`); this.log(`module.createPDQModule type: ${typeof module.createPDQModule}`); throw new Error(`ES module at ${wasmJsUrl} does not export a valid factory function. ` + `Available exports: ${Object.keys(module).join(', ')}`); } } this.log('Successfully loaded factory function from dynamic import'); return factory; } /** * Load WASM factory using appropriate method for the worker environment */ static async loadWorkerFactory(wasmJsUrl) { // Try importScripts first (most reliable for Emscripten UMD output) const importScriptsFactory = this.tryLoadViaImportScripts(wasmJsUrl); if (importScriptsFactory) { return importScriptsFactory; } // Fall back to dynamic import for ES module workers try { return await this.tryLoadViaDynamicImport(wasmJsUrl); } catch (importError) { throw new Error('Failed to load WASM module in worker. ' + 'importScripts is not available and dynamic import failed. ' + `Error: ${importError instanceof Error ? importError.message : String(importError)}`); } } /** * Ensure the module is initialized */ static ensureInit() { if (!this.module) { const errorMsg = 'PDQ module not initialized. Call PDQ.init() first.'; this.log(`ERROR: ${errorMsg}`); throw new Error(errorMsg); } return this.module; } /** * Hash image data and return PDQ hash with quality score * * @param imageData Image pixel data (RGB or grayscale) * @returns PDQ hash and quality score */ static hash(imageData) { const mod = this.ensureInit(); this.log(`Hashing image: ${imageData.width}x${imageData.height}, ${imageData.channels} channels`); // Validate input const expectedSize = imageData.width * imageData.height * imageData.channels; if (imageData.data.length !== expectedSize) { const errorMsg = `Invalid image data size. Expected ${expectedSize} bytes, got ${imageData.data.length}`; this.handleError(errorMsg); if (this.ignoreInvalidFlag) return { hash: new Uint8Array(32), quality: 0 }; } // Allocate memory in WASM const imagePtr = mod._malloc(imageData.data.length); const hashPtr = mod._malloc(32); // 256 bits = 32 bytes const qualityPtr = mod._malloc(4); // int32 try { // Copy image data to WASM memory mod.HEAPU8.set(imageData.data, imagePtr); let result; if (imageData.channels === 3) { // RGB image result = mod._pdq_hash_from_rgb(imagePtr, imageData.width, imageData.height, hashPtr, qualityPtr); } else { // Grayscale image result = mod._pdq_hash_from_gray(imagePtr, imageData.width, imageData.height, hashPtr, qualityPtr); } if (result !== 0) { const errorMsg = `PDQ hashing failed with code: ${result}`; this.log(`ERROR: ${errorMsg}`); throw new Error(errorMsg); } // Read results from WASM memory const hash = new Uint8Array(32); hash.set(mod.HEAPU8.subarray(hashPtr, hashPtr + 32)); const quality = mod.HEAP32[qualityPtr >> 2]; this.log(`Hash generated successfully. Quality: ${quality}`); return { hash, quality }; } finally { // Free WASM memory mod._free(imagePtr); mod._free(hashPtr); mod._free(qualityPtr); } } /** * Calculate Hamming distance between two PDQ hashes * Returns a value from 0 (identical) to 256 (completely different) * * @param hash1 First PDQ hash * @param hash2 Second PDQ hash * @returns Hamming distance (0-256) */ static hammingDistance(hash1, hash2) { const mod = this.ensureInit(); if (hash1.length !== 32 || hash2.length !== 32) { const errorMsg = `Invalid hash length. PDQ hashes must be 32 bytes (got ${hash1.length} and ${hash2.length})`; this.handleError(errorMsg); if (this.ignoreInvalidFlag) return 256; // Return maximum distance for invalid hashes } const hash1Ptr = mod._malloc(32); const hash2Ptr = mod._malloc(32); try { mod.HEAPU8.set(hash1, hash1Ptr); mod.HEAPU8.set(hash2, hash2Ptr); const distance = mod._pdq_hamming_distance(hash1Ptr, hash2Ptr); if (distance < 0) { const errorMsg = 'Failed to calculate Hamming distance'; this.log(`ERROR: ${errorMsg}`); throw new Error(errorMsg); } this.log(`Hamming distance calculated: ${distance}`); return distance; } finally { mod._free(hash1Ptr); mod._free(hash2Ptr); } } /** * Convert a PDQ hash to hexadecimal string representation * * @param hash PDQ hash bytes * @returns Hexadecimal string (64 characters) */ static toHex(hash) { const mod = this.ensureInit(); if (hash.length !== 32) { const errorMsg = `Invalid hash length. PDQ hashes must be 32 bytes (got ${hash.length})`; this.handleError(errorMsg); if (this.ignoreInvalidFlag) return '0'.repeat(64); // Return zero hash } const hashPtr = mod._malloc(32); const hexPtr = mod._malloc(65); // 64 chars + null terminator try { mod.HEAPU8.set(hash, hashPtr); mod._pdq_hash_to_hex(hashPtr, hexPtr); // Read hex string from WASM memory let hex = ''; for (let i = 0; i < 64; i++) { hex += String.fromCharCode(mod.HEAPU8[hexPtr + i]); } this.log(`Hash converted to hex: ${hex}`); return hex; } finally { mod._free(hashPtr); mod._free(hexPtr); } } /** * Convert a hexadecimal string to PDQ hash bytes * * @param hex Hexadecimal string (64 characters) * @returns PDQ hash bytes */ static fromHex(hex) { const mod = this.ensureInit(); this.log(`Converting hex to hash: ${hex}`); if (hex.length !== 64) { const errorMsg = `Invalid hex string length. Must be 64 characters (got ${hex.length})`; this.handleError(errorMsg); if (this.ignoreInvalidFlag) return new Uint8Array(32); // Return zero hash } const hexPtr = mod._malloc(65); const hashPtr = mod._malloc(32); try { // Copy hex string to WASM memory for (let i = 0; i < 64; i++) { mod.HEAPU8[hexPtr + i] = hex.charCodeAt(i); } mod.HEAPU8[hexPtr + 64] = 0; // null terminator const result = mod._pdq_hex_to_hash(hexPtr, hashPtr); if (result !== 0) { const errorMsg = 'Invalid hex string format'; this.handleError(errorMsg); if (this.ignoreInvalidFlag) { return new Uint8Array(32); // Return zero hash } } const hash = new Uint8Array(32); hash.set(mod.HEAPU8.subarray(hashPtr, hashPtr + 32)); this.log('Hex converted to hash successfully'); return hash; } finally { mod._free(hexPtr); mod._free(hashPtr); } } /** * Check if two hashes are similar based on a threshold * * @param hash1 First PDQ hash * @param hash2 Second PDQ hash * @param threshold Maximum Hamming distance to consider similar (default: 31) * @returns True if hashes are similar */ static areSimilar(hash1, hash2, threshold = 31) { const distance = this.hammingDistance(hash1, hash2); return distance <= threshold; } /** * Get similarity percentage between two hashes (0-100) * * @param hash1 First PDQ hash * @param hash2 Second PDQ hash * @returns Similarity percentage (0 = completely different, 100 = identical) */ static similarity(hash1, hash2) { const distance = this.hammingDistance(hash1, hash2); const maxBits = hash1.length * 8; // Total bits in hash return ((maxBits - distance) / maxBits) * 100; } /** * Order an array of hashes by similarity to a reference hash * Returns hashes sorted from most similar to least similar * * @param referenceHash The reference hash to compare against * @param hashes Array of hashes to order * @param includeIndex Whether to include original array index (default: false) * @returns Array of SimilarityMatch objects ordered by distance (ascending) */ static orderBySimilarity(referenceHash, hashes, includeIndex = false) { this.log(`Ordering ${hashes.length} hashes by similarity to reference`); // Validate reference hash length (PDQ hashes are always 32 bytes) if (referenceHash.length !== 32) { const errorMsg = `Invalid reference hash length. PDQ hashes must be 32 bytes (got ${referenceHash.length})`; this.handleError(errorMsg); if (this.ignoreInvalidFlag) return []; // Return empty array } const expectedLength = referenceHash.length; // Calculate distance and similarity for each hash const matches = hashes .map((hash, index) => { if (hash.length !== expectedLength) { const errorMsg = `Invalid hash length at index ${index}. Expected ${expectedLength} bytes, got ${hash.length}`; this.handleError(errorMsg); if (this.ignoreInvalidFlag) return null; // Skip invalid hash } const distance = this.hammingDistance(referenceHash, hash); const similarity = this.similarity(referenceHash, hash); const match = { hash, distance, similarity, }; if (includeIndex) { match.index = index; } return match; }) .filter((match) => match !== null); // Sort by distance (ascending - most similar first) // Use secondary sort by original index for stable ordering matches.sort((a, b) => { if (a.distance !== b.distance) { return a.distance - b.distance; } // When distances are equal, maintain original order const indexA = includeIndex ? (a.index ?? 0) : hashes.indexOf(a.hash); const indexB = includeIndex ? (b.index ?? 0) : hashes.indexOf(b.hash); return indexA - indexB; }); this.log(`Ordered ${matches.length} hashes. Best match has distance ${matches[0]?.distance ?? 'N/A'}`); return matches; } } exports.PDQ = PDQ; PDQ.module = null; PDQ.initPromise = null; PDQ.loggerFn = null; PDQ.ignoreInvalidFlag = false; // Default export exports.default = PDQ;