UNPKG

crawler-ts

Version:

Lightweight crawler written in TypeScript using ES6 generators.

128 lines (127 loc) 4.51 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.cache = exports.ignoreDoubles = exports.ignoreRegex = exports.allowRegex = exports.allowExtensions = exports.chain = exports.toString = void 0; var toString = function (value) { return "" + value; }; exports.toString = toString; /** * Chain multiple filter functions together. */ function chain() { var fns = []; for (var _i = 0; _i < arguments.length; _i++) { fns[_i] = arguments[_i]; } return function () { for (var _i = 0, fns_1 = fns; _i < fns_1.length; _i++) { var fn = fns_1[_i]; if (!fn.apply(this, arguments)) { return false; } } return true; }; } exports.chain = chain; /** * Create a filter that allows specific extensions. */ var allowExtensions = function (strFn) { if (strFn === void 0) { strFn = exports.toString; } return function (allowedExtensions, logger) { return function (_a) { var location = _a.location; var converted = strFn(location); var lastSlashIndex = Math.max(0, converted.lastIndexOf('/')); var lastSlashPart = converted.substr(lastSlashIndex); var lastDotIndex = lastSlashPart.lastIndexOf('.'); if (lastDotIndex !== -1) { var extension = lastSlashPart.substr(lastDotIndex + 1); if (allowedExtensions.indexOf(extension) === -1) { logger === null || logger === void 0 ? void 0 : logger.info("Extension not allowed " + converted); return false; } } return true; }; }; }; exports.allowExtensions = allowExtensions; /** * Create a filter that allows values matching the given regexes. */ var allowRegex = function (strFn) { if (strFn === void 0) { strFn = exports.toString; } return function (allowUrls, logger) { return function (_a) { var location = _a.location; for (var _i = 0, allowUrls_1 = allowUrls; _i < allowUrls_1.length; _i++) { var allowUrl = allowUrls_1[_i]; var converted = strFn(location); if (allowUrl.test(converted)) { logger === null || logger === void 0 ? void 0 : logger.info("Allowing " + converted); return true; } } return false; }; }; }; exports.allowRegex = allowRegex; /** * Create a filter that ignores values matching the given regexes. */ var ignoreRegex = function (strFn) { if (strFn === void 0) { strFn = exports.toString; } return function (ignoredUrls, logger) { return function (_a) { var location = _a.location; for (var _i = 0, ignoredUrls_1 = ignoredUrls; _i < ignoredUrls_1.length; _i++) { var ignoredUrl = ignoredUrls_1[_i]; var converted = strFn(location); if (ignoredUrl.test(converted)) { logger === null || logger === void 0 ? void 0 : logger.info("Ignoring " + converted); return false; } } return true; }; }; }; exports.ignoreRegex = ignoreRegex; /** * Create a filter that ignores doubles. */ var ignoreDoubles = function (strFn) { if (strFn === void 0) { strFn = exports.toString; } return function (logger) { var seen = []; return function (_a) { var location = _a.location; var key = strFn(location); if (!key || seen.includes(key)) { logger === null || logger === void 0 ? void 0 : logger.info("Skipping visited \"" + location + "\""); return false; } seen.push(key); return true; }; }; }; exports.ignoreDoubles = ignoreDoubles; var cache = function (strFn) { if (strFn === void 0) { strFn = exports.toString; } return function (fn) { var shouldFollowCache = {}; return function cachedShouldFollow(_a) { var location = _a.location; var string = strFn(location); if (shouldFollowCache.hasOwnProperty(string)) { return shouldFollowCache[string]; } var shouldFollow = fn({ location: location }); shouldFollowCache[string] = shouldFollow; return shouldFollow; }; }; }; exports.cache = cache;