UNPKG

matchengine-tools

Version:

Tools for interacting with TinEye's MatchEngine.

114 lines (90 loc) 3.3 kB
var fs = require("fs"); var async = require("async"); var JSONStream = require("JSONStream"); var ArgumentParser = require("argparse").ArgumentParser; var argparser = new ArgumentParser({ description: "Downloads image similarity data from MatchEngine. " + "Outputs JSON results to the specified file. Results will be " + "written out as one large object, the keys of which will be " + "the IDs of the files on the MatchEngine service. The value will " + "be an array of objects holding the match data. Files with no " + "matches will not be written out." }); argparser.addArgument(["outFile"], { help: "File to which the JSON output should be written." }); argparser.addArgument(["--conf"], { defaultValue: __dirname + "/me.conf.json", help: "The JSON config file containing MatchEngine auth details " + "(default: me.conf.json)." }); argparser.addArgument(["--threads"], { defaultValue: 2, type: "int", help: "Number of simulataneous requests to make (default: 2, max: 4)." }); argparser.addArgument(["--filter"], { help: "Filter for which MatchEngine files to download the results of. " + "For example: 'camping' will only download matches that are " + "prefixed with 'camping'." }); argparser.addArgument(["--reject"], { help: "Reject certain matches from being downloaded." }); var args = argparser.parseArgs(); var conf = require(args.conf); var ME = require("matchengine")(conf); var numDownloaded = 0; var totalImages = 0; var fileStream; console.log("Loading Match Engine data..."); ME.list(function(err, results) { fileStream = JSONStream.stringifyObject(); fileStream.pipe(fs.createWriteStream(args.outFile)); if (args.filter) { results = results.filter(function(image) { return image.indexOf(args.filter) === 0; }); } if (args.reject) { var reject = new RegExp("^(?:" + args.reject + ")"); results = results.filter(function(image) { return !reject.test(image); }); } totalImages = results.length; async.eachLimit(results, args.threads, queryImage, function(err) { console.log("Done Querying Match Engine."); fileStream.end(); }); }); function queryImage(image, callback) { numDownloaded += 1; console.log("[" + numDownloaded + "/" + totalImages + "] Querying " + image + "..."); var attempts = 0; var maxAttempts = 3; var attempt = function() { ME.similar(image, function(err, results) { attempts += 1; if (err || !results) { if (attempts === maxAttempts) { return callback(err); } else { console.log("Re-attempting:", image); return attempt(); } } // Filter out results that are just matching the same image results = results.filter(function(item) { return item.filepath !== image; }); // Only log results where there is at least one match if (results.length > 0) { fileStream.write([image, results]); } callback(); }); }; attempt(); }