UNPKG

vuln-regex-detector

Version:

Detect vulnerable regexes by querying a service hosted at Virginia Tech.

517 lines (439 loc) 12.6 kB
'use strict'; /********** * Dependencies. **********/ /* I/O. */ const https = require('https'); const syncRequest = require('sync-request'); /* Persistent cache. */ const path = require('path'); const fs = require('fs'); const crypto = require('crypto'); /* Misc. */ const os = require('os'); /********** * Globals. **********/ const REQUEST_LOOKUP_ONLY = 'LOOKUP_ONLY'; // Will only make a lookup, won't be submitting an UPDATE later. const RESPONSE_VULNERABLE = 'VULNERABLE'; const RESPONSE_SAFE = 'SAFE'; const RESPONSE_UNKNOWN = 'UNKNOWN'; const RESPONSE_INVALID = 'INVALID'; /* Logging. */ const LOGGING = false; /* Cache. */ const CACHE_TYPES = { persistent: 'persistent', memory: 'memory', none: 'none' }; /* Cache: memory. */ /* Default config. */ const defaultServerConfig = { hostname: 'toybox.cs.vt.edu', port: 8000 }; const defaultCacheConfig = { type: CACHE_TYPES.persistent, persistentDir: path.join(os.tmpdir(), 'vuln-regex-detector-client-persistentCache') }; /********** * Functions. **********/ /** * @param regex: RegExp or string (e.g. /re/ or 're') * @param [config]: provide a config object like this: * { * server: { * hostname: 'toybox.cs.vt.edu', * port: 8000 * }, * cache: { * type: cacheTypes.persistent, * [persistentDir]: '/tmp/vuln-regex-detector-client-persistentCache' * } * } * * Config defaults if not provided: * server: indicated in the example. This is a research server at Virginia Tech. * cache: 'persistent' with persistentDir in a subdir of os.tmpdir(). * * @returns Promise fulfilled with responses.X or rejected with responses.invalid. */ function checkRegex (_regex, _config) { let pattern; let config; /* Handle args. */ try { [pattern, config] = handleArgs(_regex, _config); } catch (e) { return Promise.reject(RESPONSE_INVALID); } log(`Input OK. pattern /${pattern}/ config ${JSON.stringify(config)}`); let postObject = generatePostObject(pattern); let postBuffer = JSON.stringify(postObject); let postHeaders = generatePostHeaders(config, Buffer.byteLength(postBuffer)); // Wrapper so we can return a Promise. function promiseResult (options, data) { log(`promiseResult: data ${data}`); return new Promise((resolve, reject) => { /* Check cache to avoid I/O. */ const cacheHit = checkCache(config, pattern); if (cacheHit !== RESPONSE_UNKNOWN) { log(`Cache hit: ${cacheHit}`); return resolve(cacheHit); } const req = https.request(options, (res) => { res.setEncoding('utf8'); let response = ''; res.on('data', (chunk) => { log(`Got data`); response += chunk; }); res.on('end', () => { log(`end: I got ${JSON.stringify(response)}`); const result = serverResponseToRESPONSE(response); log(`end: result ${result}`); updateCache(config, postObject.pattern, result); if (result === RESPONSE_INVALID) { return reject(result); } else { return resolve(result); } }); }); req.on('error', (e) => { log(`Error: ${e}`); return reject(RESPONSE_INVALID); }); // Write data to request body. log(`Writing to req:\n${data}`); req.write(data); req.end(); }); } return promiseResult(postHeaders, postBuffer); } /** * @param regex: see checkRegex API * @param [config]: see checkRegex API * * @returns synchronous result: RESPONSE_X * * Since this makes a synchronous HTTP query it will be slow. */ function checkRegexSync (_regex, _config) { let pattern; let config; /* Handle args. */ try { [pattern, config] = handleArgs(_regex, _config); } catch (e) { log(e); log(`Invalid input: _regex ${JSON.stringify(_regex)} _config ${JSON.stringify(_config)}`); return RESPONSE_INVALID; } log(`Input OK. pattern /${pattern}/ config ${JSON.stringify(config)}`); /* Check cache to avoid I/O. */ const cacheHit = checkCache(config, pattern); if (cacheHit !== RESPONSE_UNKNOWN) { log(`Cache hit: ${cacheHit}`); return cacheHit; } let postObject = generatePostObject(pattern); let postBuffer = JSON.stringify(postObject); let postHeaders = generatePostHeaders(config, Buffer.byteLength(postBuffer)); let url = `https://${postHeaders.hostname}:${postHeaders.port}${postHeaders.path}`; try { log(`sending syncRequest: method ${postHeaders.method} url ${url} headers ${JSON.stringify(postHeaders.headers)} body ${postBuffer}`); /* Send request. */ const response = syncRequest(postHeaders.method, url, { headers: postHeaders.headers, body: postBuffer }); /* Extract body as JSON. */ let responseBody; try { responseBody = response.getBody('utf8'); } catch (e) { log(`checkRegexSync: Unparseable response ${JSON.stringify(response)}`); return RESPONSE_INVALID; } log(`checkRegexSync: I got ${responseBody}`); /* Convert to a RESPONSE_X value. */ const result = serverResponseToRESPONSE(responseBody); updateCache(config, postObject.pattern, result); return result; } catch (e) { log(`syncRequest threw: ${JSON.stringify(e)}`); return RESPONSE_INVALID; } } /********** * Helpers. **********/ /** * @param regex: Input to checkRegex, etc. * @param config: Input to checkRegex, etc. * * @returns: [pattern, config] or throws exception */ function handleArgs (_regex, _config) { /* Identify regex pattern. */ let pattern; if (_regex) { if (typeof _regex === 'string') { pattern = _regex; } else { try { pattern = _regex.source; } catch (e) { log(`Invalid regex:`); log(_regex); } } } else { log(`Invalid regex: none provided`); } if (!pattern) { let errObj = { msg: 'Invalid args' }; throw errObj; } /* Identify config. Accept a variety of flavors and fall back to defaults as needed. */ let config = {}; if (!_config) { config.server = defaultServerConfig; config.cache = defaultCacheConfig; } else { config.server = handleServerConfig(_config.server); config.cache = handleCacheConfig(_config.cache); } return [pattern, config]; } /* Helper for handleArgs: config.server. */ function handleServerConfig (serverConfig) { if (!serverConfig) { return defaultServerConfig; } else if (!serverConfig.hasOwnProperty('hostname') || !serverConfig.hasOwnProperty('port')) { return defaultServerConfig; } return serverConfig; } /* Helper for handleArgs: config.cache. */ function handleCacheConfig (cacheConfig) { if (!cacheConfig) { return defaultCacheConfig; } // Must have valid type. if (!cacheConfig.hasOwnProperty('type') || !CACHE_TYPES.hasOwnProperty(cacheConfig.type)) { cacheConfig.type = CACHE_TYPES.persistent; } // If type is persistent, need persistentDir. if (cacheConfig.type === CACHE_TYPES.persistent && !cacheConfig.hasOwnProperty('persistentDir')) { cacheConfig.persistentDir = defaultCacheConfig.persistentDir; } return cacheConfig; } /* Return object to be sent over the wire as JSON. */ function generatePostObject (pattern) { const postObject = { pattern: pattern, language: 'javascript', requestType: REQUEST_LOOKUP_ONLY }; return postObject; } /* Return headers for the POST request. */ function generatePostHeaders (config, payloadSize) { const postHeaders = { hostname: config.server.hostname, port: config.server.port, path: '/api/lookup', method: 'POST', headers: { 'Content-Type': 'application/json', 'Content-Length': payloadSize } }; return postHeaders; } /* response: raw response from server */ function serverResponseToRESPONSE (response) { try { const obj = JSON.parse(response); if (obj.result === RESPONSE_UNKNOWN) { return RESPONSE_UNKNOWN; } else { return obj.result.result; } } catch (e) { return RESPONSE_INVALID; } } /********** * Cache. * * The cache in use is controlled by CACHE_TYPE. * If CACHE_TYPE is 'none' then APIs behave appropriately. * The cache is implemented using a key-value interface. * * Cache accesses are synchronous. * If CACHE_TYPE is 'memory' that's fine. * If CACHE_TYPE is 'persistent' then there are some performance concerns. * TODO Address this with sync and async versions of the APIs. **********/ function useCache (config) { return config.cache.type !== CACHE_TYPES.none; } function updateCache (config, pattern, response) { if (!useCache(config)) { return; } return kvPut(config, pattern, response); } /* Returns RESPONSE_{VULNERABLE|SAFE} on hit, else RESPONSE_UNKNOWN on miss or disabled. */ function checkCache (config, pattern) { if (!useCache(config)) { return RESPONSE_UNKNOWN; } return kvGet(config, pattern); } function kvPut (config, key, value) { /* Only cache VULNERABLE|SAFE responses. */ if (value !== RESPONSE_VULNERABLE && value !== RESPONSE_SAFE) { return; } /* Put in the appropriate cache. */ switch (config.cache.type) { case CACHE_TYPES.persistent: return kvPutPersistent(config, key, value); case CACHE_TYPES.memory: return kvPutMemory(key, value); default: return RESPONSE_UNKNOWN; } } function kvGet (config, key) { /* Get from the appropriate cache. */ switch (config.cache.type) { case CACHE_TYPES.persistent: return kvGetPersistent(config, key); case CACHE_TYPES.memory: return kvGetMemory(key); default: return RESPONSE_UNKNOWN; } } /* Persistent KV. */ /* Returns true if initialized, false on initialization failure. */ function initializeKVPersistent (config) { /* NB Makes FS syscalls each time in case config changes during lifetime. * Could cache the set of initialized dirs if this is a performance issue. */ /* First try a mkdir. Dir might exist already. */ try { fs.mkdirSync(config.cache.persistentDir); } catch (e) { } /* If we have a dir now, we're happy. * This also works if persistentDir is a symlink. */ try { const stats = fs.lstatSync(config.cache.persistentDir); if (stats.isDirectory()) { return true; } else { return false; } } catch (e) { /* Hmm. */ return false; } } function kvPersistentFname (config, key) { /* Need something we can safely use as a file name. * Keys are patterns and might contain /'s or \'s. * * Using a hash might give us false reports on collisions, but this is * exceedingly unlikely in typical use cases (a few hundred regexes tops). */ const hash = crypto.createHash('md5').update(key).digest('hex'); const fname = path.join(config.cache.persistentDir, `${hash}.json`); return fname; } function kvPutPersistent (config, key, value) { if (!initializeKVPersistent(config)) { log(`kvPutPersistent: could not initialize`); return; } try { /* This must be atomic in case of concurrent put and get from different processes. * Hence the use of a tmp file and rename. */ const fname = kvPersistentFname(config, key); const tmpFname = `${fname}-${process.pid}-tmp`; log(`kvPutPersistent: putting result in ${fname}`); fs.writeFileSync(tmpFname, JSON.stringify({key: key, value: value})); fs.renameSync(tmpFname, fname); } catch (e) { /* Ignore failures. */ } } function kvGetPersistent (config, key) { if (!initializeKVPersistent(config)) { return RESPONSE_UNKNOWN; } try { const fname = kvPersistentFname(config, key); log(`kvGetPersistent: getting result from ${fname}`); const cont = JSON.parse(fs.readFileSync(fname)); return cont.value; } catch (e) { return RESPONSE_UNKNOWN; } } /* Memory (volatile) KV. */ /* Map pattern to RESPONSE_VULNERABLE or RESPONSE_SAFE in case of duplicate queries. * We do not cache RESPONSE_UNKNOWN or RESPONSE_INVALID responses since these might change. */ let pattern2response = {}; function kvPutMemory (key, value) { if (!pattern2response.hasOwnProperty(key)) { pattern2response[key] = value; } } function kvGetMemory (key) { const hit = pattern2response[key]; if (hit) { log(`kvGetMemory: hit: ${key} -> ${hit}`); return hit; } else { return RESPONSE_UNKNOWN; } } /********** * Utilities. **********/ function log (msg) { if (LOGGING) { console.error(msg); } } /********** * Exports. **********/ module.exports = { /* Core APIs. */ test: checkRegex, testSync: checkRegexSync, /* Config. */ defaultServerConfig: defaultServerConfig, // makes testing easier defaultCacheConfig: defaultCacheConfig, // makes testing easier cacheTypes: { persistent: CACHE_TYPES.persistent, memory: CACHE_TYPES.memory, none: CACHE_TYPES.none }, /* Interpreting API responses. */ responses: { vulnerable: RESPONSE_VULNERABLE, safe: RESPONSE_SAFE, unknown: RESPONSE_UNKNOWN, invalid: RESPONSE_INVALID } };