UNPKG

multipass-torrent

Version:

Collects torrents from various sources (dump, RSS, HTML pages) and associates the video files within with IMDB ID

78 lines (63 loc) 2.64 kB
/* Collect info hashes to queue for indexing */ var needle = require("needle"); var gunzip = require("gunzip-maybe"); var url = require("url"); var _ = require("lodash"); var importers = { dump: require("../importers/dump"), json: require("../importers/json"), xmlRss: require("../importers/xml-rss"), generic: require("../importers/generic") }; function collect(source, callback, onHash) { var status = { found: 0, start: Date.now() }; getStream(source, function(err, stream, detectedType) { if (err) return callback(err); var type = status.type = importers[source.type] ? source.type : detectedType; // Pass on to the importer stream = importers[type](stream, source); // Collection results var unique = { }; stream.on("infoHash", function(hash, extra) { hash = hash.toLowerCase(); if (unique[hash]) return; unique[hash] = true; status.found++; if (onHash) onHash(hash.toLowerCase(), extra); }); stream.on("error", callback); stream.on("end", function() { stream.removeAllListeners(); status.end = Date.now(); callback(null, status) }); }); }; function getStream(source, callback) { var stream, response, callback = _.once(callback); stream = response = needle.get(source.url, { follow_max: 4, open_timeout: 5000, headers: { "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.99 Safari/537.36" } }).on("headers", function(headers) { var filename = headers["content-disposition"] || url.parse(source.url).pathname; var detectedType = "generic"; if (headers["content-type"] && headers["content-type"].match("xml")) detectedType = "xmlRss"; if (headers["content-type"] && headers["content-type"].match("json")) detectedType = "json"; if (filename.match(".txt.gz$")) detectedType = "dump"; if (detectedType !="json") stream = stream.pipe(gunzip()).pipe(gunzip()); // Some sources can be gunzipped twice (one for request, another for being a .txt.gz) stream.on("end", function() { response.end() }); // make sure response is closed callback(null, stream, detectedType); }).on("error", function(e) { callback(e) }) .on("end", function() { // TODO: we can check statusCode / etc? callback(new Error("empty response / couldn't detect type")); }) }; module.exports = { collect: collect, getStream: getStream };