UNPKG

@netlify/build

Version:

Netlify build module

github.com/netlify/build

498 lines (497 loc) • 21.4 kB

JavaScript

import { createReadStream, promises as fs, existsSync } from 'node:fs'; import path from 'node:path'; import { fdir } from 'fdir'; import { minimatch } from 'minimatch'; import { LIKELY_SECRET_PREFIXES, SAFE_LISTED_VALUES } from './secret_prefixes.js'; /** * Determine if the user disabled scanning via env var * @param env current envars * @returns */ export function isSecretsScanningEnabled(env) { if (env.SECRETS_SCAN_ENABLED === false || env.SECRETS_SCAN_ENABLED === 'false') { return false; } return true; } /** * Determine if the user disabled enhanced scanning via env var * @param env current envars * @returns */ export function isEnhancedSecretsScanningEnabled(env) { if (env.SECRETS_SCAN_SMART_DETECTION_ENABLED === false || env.SECRETS_SCAN_SMART_DETECTION_ENABLED === 'false') { return false; } return true; } export function getStringArrayFromEnvValue(env, envVarName) { if (typeof env[envVarName] !== 'string') { return []; } const omitKeys = env[envVarName] .split(',') .map((s) => s.trim()) .filter(Boolean); return omitKeys; } export function getOmitValuesFromEnhancedScanForEnhancedScanFromEnv(env) { return getStringArrayFromEnvValue(env, 'SECRETS_SCAN_SMART_DETECTION_OMIT_VALUES'); } function filterOmittedKeys(env, envKeys = []) { const omitKeys = getStringArrayFromEnvValue(env, 'SECRETS_SCAN_OMIT_KEYS'); return envKeys.filter((key) => !omitKeys.includes(key)); } /** * Trivial values are values that are: * - empty or short strings * - string forms of booleans * - booleans * - numbers or objects with fewer than 4 chars */ function isValueTrivial(val) { if (typeof val === 'string') { // string forms of booleans if (val === 'true' || val === 'false') { return true; } // trivial values are empty or short strings return val.trim().length < 4; } if (typeof val === 'boolean') { // booleans are always considered trivial return true; } if (typeof val === 'number' || typeof val === 'object') { return JSON.stringify(val).length < 4; } return !val; } /** * given the explicit secret keys and env vars, return the list of secret keys which have non-empty or non-trivial values. This * will also filter out keys passed in the SECRETS_SCAN_OMIT_KEYS env var. * * non-trivial values are values that are: * - >4 characters/digits * - not booleans * * @param env env vars list * @param secretKeys * @returns string[] */ export function getSecretKeysToScanFor(env, secretKeys) { const filteredSecretKeys = filterOmittedKeys(env, secretKeys); return filteredSecretKeys.filter((key) => !isValueTrivial(env[key])); } const getShannonEntropy = (str) => { const len = str.length; if (len === 0) return 0; const freqMap = {}; for (const char of str) { freqMap[char] = (freqMap[char] || 0) + 1; } let entropy = 0; for (const char in freqMap) { const p = freqMap[char] / len; entropy -= p * Math.log2(p); } return entropy; }; const HIGH_ENTROPY_THRESHOLD = 4.5; const doesEntropyMeetThresholdForSecret = (str) => { const entropy = getShannonEntropy(str); return entropy >= HIGH_ENTROPY_THRESHOLD; }; // Most prefixes are 4-5 chars, so requiring 12 chars after ensures a reasonable secret length const MIN_CHARS_AFTER_PREFIX = 12; // Escape special regex characters (like $, *, +, etc) in prefixes so they're treated as literal characters const prefixMatchingRegex = LIKELY_SECRET_PREFIXES.map((p) => p.replace(/[$*+?.()|[\]{}]/g, '\\$&')).join('|'); // Build regex pattern for matching secrets with various delimiters and quotes: // (?:["'\`]|[=]) - match either quotes, or = at the start // Named capturing groups: // - <token>: captures the entire secret value including its prefix // - <prefix>: captures just the prefix part (e.g. 'aws_', 'github_pat_') // (?:${prefixMatchingRegex}) - non-capturing group containing our escaped prefixes (e.g. aws_|github_pat_|etc) // [a-zA-Z0-9-]{${MIN_CHARS_AFTER_PREFIX}} - match exactly MIN_CHARS_AFTER_PREFIX chars (alphanumeric or dash) after the prefix // [a-zA-Z0-9-]*? - lazily match any additional chars (alphanumeric or dash) // (?:["'\`]|$) - end with either quotes or end of line // gi - global and case insensitive flags // Note: Using the global flag (g) means this regex object maintains state between executions. // We would need to reset lastIndex to 0 if we wanted to reuse it on the same string multiple times. const likelySecretRegex = new RegExp(`(?:["'\`]|[=]) *(?<token>(?<prefix>${prefixMatchingRegex})[a-zA-Z0-9-]{${MIN_CHARS_AFTER_PREFIX}}[a-zA-Z0-9-]*?)(?:["'\`]|$)`, 'gi'); /** * Checks a chunk of text for likely secrets based on known prefixes and patterns. * The function works by: * 1. Splitting the chunk into tokens using quotes, whitespace, equals signs, colons, and commas as delimiters * 2. For each token, checking if it matches our secret pattern: * - Must start (^) with one of our known prefixes (e.g. aws_, github_pat_, etc) * - Must be followed by at least MIN_CHARS_AFTER_PREFIX non-whitespace characters * - Must extend to the end ($) of the token * * For example, given the chunk: secretKey='aws_123456789012345678' * 1. It's split into tokens: ['secretKey', 'aws_123456789012345678'] * 2. Each token is checked against the regex pattern: * - 'secretKey' doesn't match (doesn't start with a known prefix) * - 'aws_123456789012345678' matches (starts with 'aws_' and has sufficient length) * */ export function findLikelySecrets({ text, omitValuesFromEnhancedScan = [], }) { if (!text) return []; const matches = []; let match; const allOmittedValues = [...omitValuesFromEnhancedScan, ...SAFE_LISTED_VALUES]; while ((match = likelySecretRegex.exec(text)) !== null) { const token = match.groups?.token; const prefix = match.groups?.prefix; if (!token || !prefix || allOmittedValues.includes(token)) { continue; } // Despite the prefix, the string does not look random enough to be convinced it's a secret if (!doesEntropyMeetThresholdForSecret(token)) { continue; } matches.push({ prefix, index: match.index, }); } return matches; } /** * Given the env and base directory, find all file paths to scan. It will look at the * env vars to decide if it should omit certain paths. * * @param options * @returns string[] of relative paths from base of files that should be searched */ export async function getFilePathsToScan({ env, base }) { const omitPathsAlways = ['.git/', '.cache/']; // node modules is dense and is only useful to scan if the repo itself commits these // files. As a simple check to understand if the repo would commit these files, we expect // that they would not ignore them from their git settings. So if gitignore includes // node_modules anywhere we will omit looking in those folders - this will allow repos // that do commit node_modules to still scan them. let ignoreNodeModules = false; const gitignorePath = path.resolve(base, '.gitignore'); const gitignoreContents = existsSync(gitignorePath) ? await fs.readFile(gitignorePath, 'utf-8') : ''; if (gitignoreContents?.includes('node_modules')) { ignoreNodeModules = true; } let files = await new fdir() .withRelativePaths() .filter((path) => { if (ignoreNodeModules && path.includes('node_modules')) { return false; } return true; }) .crawl(base) .withPromise(); // normalize the path separators to all use the forward slash // this is needed for windows machines and snapshot tests consistency. files = files.map((f) => f.split(path.sep).join('/')); let omitPaths = []; if (typeof env.SECRETS_SCAN_OMIT_PATHS === 'string') { omitPaths = env.SECRETS_SCAN_OMIT_PATHS.split(',') .map((s) => s.trim()) .filter(Boolean); } omitPaths = omitPaths.concat(omitPathsAlways); if (omitPaths.length > 0) { files = files.filter((relativePath) => !omitPathMatches(relativePath, omitPaths)); } return files; } // omit paths are relative path substrings. const omitPathMatches = (relativePath, omitPaths) => { return omitPaths.some((oPath) => { // check if the substring matches or glob pattern return relativePath.startsWith(oPath) || minimatch(relativePath, oPath, { dot: true }); }); }; /** * Given the env vars, the current keys, paths, etc. Look across the provided files to find the values * of the secrets based on the keys provided. It will process files separately in different read streams. * The values that it looks for will be a unique set of plaintext, base64 encoded, and uri encoded permutations * of each value - to catch common permutations that occur post build. * * @param scanArgs {ScanArgs} scan options * @returns promise with all of the scan results, if any */ export async function scanFilesForKeyValues({ env, keys, filePaths, base, enhancedScanning, omitValuesFromEnhancedScan = [], }) { const scanResults = { matches: [], scannedFilesCount: 0, }; const keyValues = keys.reduce((kvs, key) => { let val = env[key]; if (typeof val === 'number' || typeof val === 'object') { val = JSON.stringify(val); } if (typeof val === 'string') { // to detect the secrets effectively // normalize the value so that we remove leading and // ending whitespace and newline characters const normalizedVal = val.replace(/^\s*/, '').replace(/\s*$/, ''); kvs[key] = Array.from(new Set([normalizedVal, Buffer.from(normalizedVal).toString('base64'), encodeURIComponent(normalizedVal)])); } return kvs; }, {}); scanResults.scannedFilesCount = filePaths.length; let settledPromises = []; // process the scanning in batches to not run into memory issues by // processing all files at the same time. while (filePaths.length > 0) { const chunkSize = 200; const batch = filePaths.splice(0, chunkSize); settledPromises = settledPromises.concat(await Promise.allSettled(batch.map((file) => { return searchStreamMinimalChunks({ basePath: base, file, keyValues, enhancedScanning, omitValuesFromEnhancedScan, }); }))); } settledPromises.forEach((result) => { if (result.status === 'fulfilled' && result.value?.length > 0) { scanResults.matches = scanResults.matches.concat(result.value); } }); return scanResults; } /** * Search stream implementation using just read stream that allows to buffer less content */ const searchStreamMinimalChunks = ({ basePath, file, keyValues, enhancedScanning, omitValuesFromEnhancedScan = [], }) => { return new Promise((resolve, reject) => { const matches = []; const keyVals = [].concat(...Object.values(keyValues)); // determine longest value that we will search for - needed to determine minimal size of rolling buffer const maxValLength = Math.max(0, // explicit secrets ...keyVals.map((v) => v.length), ...(enhancedScanning ? [ // omitted likely secrets (after finding likely secret we check if it should be omitted, so we need to capture at least size of omitted values) ...omitValuesFromEnhancedScan.map((v) => (typeof v === 'string' ? v.length : 0)), // minimum length needed to find likely secret ...LIKELY_SECRET_PREFIXES.map((v) => v.length + MIN_CHARS_AFTER_PREFIX), ] : [])); if (maxValLength === 0) { // no non-empty values to scan for resolve(matches); return; } const filePath = path.resolve(basePath, file); const inStream = createReadStream(filePath); function getKeyForValue(val) { let key = ''; for (const [secretKeyName, valuePermutations] of Object.entries(keyValues)) { if (valuePermutations.includes(val)) { key = secretKeyName; } } return key; } let buffer = ''; let newLinesIndexesInCurrentBuffer = null; function getCurrentBufferNewLineIndexes() { if (newLinesIndexesInCurrentBuffer === null) { newLinesIndexesInCurrentBuffer = []; let newLineIndex = -1; while ((newLineIndex = buffer.indexOf('\n', newLineIndex + 1)) !== -1) { newLinesIndexesInCurrentBuffer.push(newLineIndex); } } return newLinesIndexesInCurrentBuffer; } /** * Amount of characters that were fully processed. Used to determine absolute position of current rolling buffer * in the file. */ let processedCharacters = 0; /** * Amount of lines that were fully processed. Used to determine absolute line number of matches in current rolling buffer. */ let processedLines = 0; /** * Map keeping track of found secrets in current file. Used to prevent reporting same secret+position multiple times. * Needed because rolling buffer might retain same secret in multiple passes. */ const foundIndexes = new Map(); /** * We report given secret at most once per line, so we keep track lines we already reported for given secret. */ const foundLines = new Map(); /** * Calculate absolute line number in a file for given match in the current rolling buffer. */ function getLineNumberForMatchInTheBuffer({ indexInBuffer, key }) { const absolutePositionInFile = processedCharacters + indexInBuffer; // check if we already handled match for given key in this position let foundIndexesForKey = foundIndexes.get(key); if (!foundIndexesForKey?.has(absolutePositionInFile)) { // ensure we track match for this key and position to not report it again in future passes if (!foundIndexesForKey) { foundIndexesForKey = new Set(); foundIndexes.set(key, foundIndexesForKey); } foundIndexesForKey.add(absolutePositionInFile); // calculate line number based on amount of fully processed lines and position of line breaks in current buffer let lineNumber = processedLines + 1; for (const newLineIndex of getCurrentBufferNewLineIndexes()) { if (indexInBuffer > newLineIndex) { lineNumber++; } else { break; } } // check if we already handled match for given key in this line let foundLinesForKey = foundLines.get(key); if (!foundLinesForKey?.has(lineNumber)) { if (!foundLinesForKey) { foundLinesForKey = new Set(); foundLines.set(key, foundLinesForKey); } foundLinesForKey.add(lineNumber); // only report line number if we didn't report it yet for this key return lineNumber; } } } function processBuffer() { for (const valVariant of keyVals) { let indexInBuffer = -1; while ((indexInBuffer = buffer.indexOf(valVariant, indexInBuffer + 1)) !== -1) { const key = getKeyForValue(valVariant); const lineNumber = getLineNumberForMatchInTheBuffer({ indexInBuffer, key, }); if (typeof lineNumber === 'number') { matches.push({ file, lineNumber, key, enhancedMatch: false, }); } } } if (enhancedScanning) { const likelySecrets = findLikelySecrets({ text: buffer, omitValuesFromEnhancedScan }); for (const { index, prefix } of likelySecrets) { const lineNumber = getLineNumberForMatchInTheBuffer({ indexInBuffer: index, key: prefix, }); if (typeof lineNumber === 'number') { matches.push({ file, lineNumber, key: prefix, enhancedMatch: true, }); } } } } inStream.on('data', function (chunk) { buffer += chunk.toString(); // reset new line positions in current buffer newLinesIndexesInCurrentBuffer = null; if (buffer.length > maxValLength) { // only process if buffer is large enough to contain longest secret, if final chunk isn't large enough // it will be processed in `close` event handler processBuffer(); // we will keep maxValLength characters in the buffer, surplus of characters at this point is fully processed const charactersInBufferThatWereFullyProcessed = buffer.length - maxValLength; processedCharacters += charactersInBufferThatWereFullyProcessed; // advance processed lines for (const newLineIndex of getCurrentBufferNewLineIndexes()) { if (newLineIndex < charactersInBufferThatWereFullyProcessed) { processedLines++; } else { break; } } // Keep the last part of the buffer to handle split values across chunks buffer = buffer.slice(charactersInBufferThatWereFullyProcessed); } }); inStream.on('error', function (error) { if (error?.code === 'EISDIR') { // file path is a directory - do nothing resolve(matches); } else { reject(error); } }); inStream.on('close', function () { // process any remaining buffer content processBuffer(); resolve(matches); }); }); }; /** * ScanResults are all of the finds for all keys and their disparate locations. Scanning is * async in streams so order can change a lot. Some matches are the result of an env var explictly being marked as secret, * while others are part of the enhanced secret scan. * * This function groups the results into an object where the results are separate into the secretMatches and enhancedSecretMatches, * their value being an object where the keys are the env var keys and the values are all match results for that key. * * @param scanResults * @returns */ export function groupScanResultsByKeyAndScanType(scanResults) { const secretMatchesByKeys = {}; const enhancedSecretMatchesByKeys = {}; scanResults.matches.forEach((matchResult) => { if (matchResult.enhancedMatch) { if (!enhancedSecretMatchesByKeys[matchResult.key]) { enhancedSecretMatchesByKeys[matchResult.key] = []; } enhancedSecretMatchesByKeys[matchResult.key].push(matchResult); } else { if (!secretMatchesByKeys[matchResult.key]) { secretMatchesByKeys[matchResult.key] = []; } secretMatchesByKeys[matchResult.key].push(matchResult); } }); // sort results to get a consistent output and logically ordered match results const sortMatches = (matchesByKeys) => { Object.keys(matchesByKeys).forEach((key) => { matchesByKeys[key].sort((a, b) => { // sort by file name first if (a.file > b.file) { return 1; } // sort by line number second if (a.file === b.file) { if (a.lineNumber > b.lineNumber) { return 1; } if (a.lineNumber === b.lineNumber) { return 0; } return -1; } return -1; }); }); }; sortMatches(secretMatchesByKeys); sortMatches(enhancedSecretMatchesByKeys); return { secretMatches: secretMatchesByKeys, enhancedSecretMatches: enhancedSecretMatchesByKeys }; }