UNPKG

snyk-docker-plugin

Version:
377 lines 16.4 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.extractArchive = void 0; const Debug = require("debug"); const fs_1 = require("fs"); const path_1 = require("path"); const tar_stream_1 = require("tar-stream"); const __1 = require(".."); const stream_utils_1 = require("../../stream-utils"); const decompress_maybe_1 = require("../decompress-maybe"); const layer_1 = require("../layer"); const debug = Debug("snyk"); const MEDIATYPE_DOCKER_MANIFEST_V2 = "application/vnd.docker.distribution.manifest.v2+json"; const MEDIATYPE_DOCKER_MANIFEST_LIST_V2 = "application/vnd.docker.distribution.manifest.list.v2+json"; const MEDIATYPE_OCI_MANIFEST_V1 = "application/vnd.oci.image.manifest.v1+json"; const MEDIATYPE_OCI_MANIFEST_LIST_V1 = "application/vnd.oci.image.index.v1+json"; // Maximum size for JSON metadata files. Matches the limit in streamToJson. // Files larger than this are layer blobs, not JSON metadata. const MAX_JSON_SIZE_BYTES = 2 * 1024 * 1024; /** * Retrieve the products of files content from the specified oci-archive. * * Uses a two-pass approach: * 1. First pass: Parse JSON metadata (manifests, configs, indexes) to determine * which layers are needed for the target platform. * 2. Second pass: Extract only the required layer blobs. * * This avoids memory issues from buffering large layer blobs unnecessarily. * * @param ociArchiveFilesystemPath Path to image file saved in oci-archive format. * @param extractActions Array of pattern-callbacks pairs. * @param options PluginOptions * @returns Array of extracted files products sorted by the reverse order of the layers from last to first. */ async function extractArchive(ociArchiveFilesystemPath, extractActions, options) { // Pass 1: Extract JSON metadata const metadata = await extractMetadata(ociArchiveFilesystemPath); // Determine which manifest and layers we need const { manifest, imageConfig } = resolveManifestAndConfig(metadata, options); // Get the list of layer digests we need to extract const requiredLayerDigests = new Set(manifest.layers.map((layer) => layer.digest)); // Pass 2: Extract the required layers const { layers, failedDigests } = await extractLayers(ociArchiveFilesystemPath, requiredLayerDigests, extractActions); // Report any layer extraction failures if (failedDigests.size > 0) { const failures = Array.from(failedDigests.entries()) .map(([digest, error]) => `${digest}: ${error}`) .join("; "); debug(`Failed to extract ${failedDigests.size} layer(s): ${failures}`); } // Build the result const filteredLayers = manifest.layers .filter((layer) => layers[layer.digest]) .map((layer) => layers[layer.digest]) .reverse(); if (filteredLayers.length === 0) { // Provide more context about why extraction failed if (failedDigests.size > 0) { const failedList = Array.from(failedDigests.keys()).join(", "); throw new __1.InvalidArchiveError(`Failed to extract any layers from the image. ` + `${failedDigests.size} layer(s) failed: ${failedList}`); } throw new __1.InvalidArchiveError("We found no layers in the provided image. " + "The archive may be corrupted or in an unsupported format."); } // Warn if some but not all layers failed (partial extraction) const missingLayers = manifest.layers.filter((layer) => !layers[layer.digest]); if (missingLayers.length > 0) { debug(`Warning: ${missingLayers.length} layer(s) from manifest were not extracted: ` + missingLayers.map((l) => l.digest).join(", ")); } return { layers: filteredLayers, manifest, imageConfig, }; } exports.extractArchive = extractArchive; /** * Pass 1: Extract only JSON metadata from the archive. * * Skips large files (> MAX_JSON_SIZE_BYTES) since they're layer blobs, not JSON. * For small files, attempts JSON parse; binary data fails fast on the first byte check. */ async function extractMetadata(ociArchiveFilesystemPath) { return new Promise((resolve, reject) => { const tarExtractor = (0, tar_stream_1.extract)(); const manifests = {}; const configs = []; let mainIndexFile; const indexFiles = {}; tarExtractor.on("entry", async (header, stream, next) => { try { if (header.type === "file") { const normalizedHeaderName = (0, path_1.normalize)(header.name); if (isMainIndexFile(normalizedHeaderName)) { mainIndexFile = await (0, stream_utils_1.streamToJson)(stream); } else if (isBlobPath(normalizedHeaderName) && (header.size === undefined || header.size <= MAX_JSON_SIZE_BYTES)) { // Small blob file - try to parse as JSON metadata // Large files and non-blob files (oci-layout, etc.) are skipped const jsonContent = await tryParseJsonMetadata(stream); if (jsonContent !== undefined) { const digest = getDigestFromPath(normalizedHeaderName); if (isArchiveManifest(jsonContent)) { manifests[digest] = jsonContent; } else if (isImageIndexFile(jsonContent)) { indexFiles[digest] = jsonContent; } else if (isImageConfigFile(jsonContent)) { configs.push(jsonContent); } } } // All other files (non-blob, large blobs) are drained below } } catch (err) { debug(`Error processing OCI archive entry ${header.name}: ${err.message}`); } stream.resume(); // Drain the stream next(); }); tarExtractor.on("finish", () => { resolve({ mainIndexFile, manifests, indexFiles, configs }); }); tarExtractor.on("error", (error) => { reject(error); }); (0, fs_1.createReadStream)(ociArchiveFilesystemPath) .pipe((0, decompress_maybe_1.decompressMaybe)()) .pipe(tarExtractor); }); } /** * Attempts to parse a stream as JSON metadata. * Returns undefined if the stream doesn't contain valid JSON (e.g., it's a layer blob). * * Uses a fast-fail check: if the first byte isn't '{' or '[', it's not JSON. * Note: This doesn't handle JSON with leading whitespace, which is technically valid * but never produced by standard OCI tooling. */ async function tryParseJsonMetadata(stream) { return new Promise((resolve) => { let firstChunk = true; const chunks = []; let bytes = 0; let resolved = false; const cleanup = () => { stream.removeAllListeners("data"); stream.removeAllListeners("end"); // Keep a no-op error handler to prevent unhandled error events // when the stream is drained after fast-fail stream.removeAllListeners("error"); // tslint:disable-next-line:no-empty stream.on("error", () => { }); }; stream.on("data", (chunk) => { if (firstChunk) { firstChunk = false; // Fast-fail: JSON must start with { or [ const firstByte = chunk[0]; if (firstByte !== 0x7b && firstByte !== 0x5b) { // 0x7b = '{', 0x5b = '[' resolved = true; cleanup(); resolve(undefined); return; } } bytes += chunk.length; if (bytes <= MAX_JSON_SIZE_BYTES) { chunks.push(chunk.toString("utf8")); } }); stream.on("end", () => { if (resolved) { return; } if (chunks.length === 0) { resolve(undefined); return; } try { resolve(JSON.parse(chunks.join(""))); } catch (_a) { resolve(undefined); } }); stream.on("error", () => { if (!resolved) { resolve(undefined); } }); }); } /** * Pass 2: Extract only the specified layer blobs. * * Tracks extraction failures so the caller can report which layers failed * rather than silently returning incomplete results. */ async function extractLayers(ociArchiveFilesystemPath, requiredDigests, extractActions) { return new Promise((resolve, reject) => { const tarExtractor = (0, tar_stream_1.extract)(); const layers = {}; const failedDigests = new Map(); tarExtractor.on("entry", async (header, stream, next) => { try { if (header.type === "file") { const normalizedHeaderName = (0, path_1.normalize)(header.name); if (!isMainIndexFile(normalizedHeaderName) && isBlobPath(normalizedHeaderName)) { const digest = getDigestFromPath(normalizedHeaderName); if (requiredDigests.has(digest)) { // This is a layer we need - extract it try { const layer = await (0, layer_1.extractImageLayer)(stream, extractActions); layers[digest] = layer; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); debug(`Failed to extract layer ${digest}: ${errorMessage}`); failedDigests.set(digest, errorMessage); } } } } } catch (err) { debug(`Error processing archive entry ${header.name}: ${err.message}`); } stream.resume(); next(); }); tarExtractor.on("finish", () => { resolve({ layers, failedDigests }); }); tarExtractor.on("error", (error) => { reject(error); }); (0, fs_1.createReadStream)(ociArchiveFilesystemPath) .pipe((0, decompress_maybe_1.decompressMaybe)()) .pipe(tarExtractor); }); } /** * Checks if a path is in the blobs directory (blobs/<algo>/<hash>). * Non-blob files like oci-layout should be skipped. */ function isBlobPath(normalizedPath) { const parts = normalizedPath.split(path_1.sep).filter(Boolean); return parts[0] === "blobs" && parts.length >= 3; } /** * Extracts digest from a blob path in the format blobs/<algo>/<hash>. * Returns the digest as <algo>:<hash> to match manifest digest format. * * Caller should verify isBlobPath() first. */ function getDigestFromPath(normalizedPath) { const headerParts = normalizedPath.split(path_1.sep).filter(Boolean); const algorithm = headerParts[1]; const hash = headerParts[headerParts.length - 1]; return `${algorithm}:${hash}`; } function resolveManifestAndConfig(metadata, options) { const filteredConfigs = metadata.configs.filter((config) => { return (config === null || config === void 0 ? void 0 : config.os) !== "unknown" || (config === null || config === void 0 ? void 0 : config.architecture) !== "unknown"; }); const platform = (options === null || options === void 0 ? void 0 : options.platform) || (filteredConfigs.length === 1 ? (0, __1.getPlatformFromConfig)(filteredConfigs[0]) : "linux/amd64"); const platformInfo = getOciPlatformInfoFromOptionString(platform); const manifest = getManifest(metadata.mainIndexFile, metadata.manifests, metadata.indexFiles, platformInfo); if (!manifest) { throw new __1.InvalidArchiveError(`Could not find manifest for platform ${platformInfo.os}/${platformInfo.architecture} in archive`); } const imageConfig = getImageConfig(metadata.configs, platformInfo); if (imageConfig === undefined) { throw new __1.InvalidArchiveError("Could not find the image config in the provided image"); } return { manifest, imageConfig }; } function getManifest(imageIndex, manifestCollection, indexFiles, platformInfo) { if (!imageIndex) { return manifestCollection[Object.keys(manifestCollection)[0]]; } const allManifests = getAllManifestsIndexItems(imageIndex, indexFiles); const manifestInfo = getImageManifestInfo(allManifests, platformInfo); if (manifestInfo === undefined) { throw new __1.InvalidArchiveError("Image does not support the requested CPU architecture or operating system"); } return manifestCollection[manifestInfo.digest]; } function getAllManifestsIndexItems(imageIndex, indexFiles) { const allManifestsInfo = []; for (const manifest of imageIndex.manifests) { if (manifest.mediaType === MEDIATYPE_OCI_MANIFEST_V1 || manifest.mediaType === MEDIATYPE_DOCKER_MANIFEST_V2) { // an archive manifest file allManifestsInfo.push(manifest); } else if (manifest.mediaType === MEDIATYPE_OCI_MANIFEST_LIST_V1 || manifest.mediaType === MEDIATYPE_DOCKER_MANIFEST_LIST_V2) { // nested index const index = indexFiles[manifest.digest]; if (index) { allManifestsInfo.push(...getAllManifestsIndexItems(index, indexFiles)); } } } return allManifestsInfo; } function isArchiveManifest(manifest) { return (manifest !== undefined && manifest.layers && Array.isArray(manifest.layers)); } function isImageConfigFile(json) { return json !== undefined && json.architecture && json.rootfs; } function isImageIndexFile(json) { return (((json === null || json === void 0 ? void 0 : json.mediaType) === MEDIATYPE_OCI_MANIFEST_LIST_V1 || (json === null || json === void 0 ? void 0 : json.mediaType) === MEDIATYPE_DOCKER_MANIFEST_LIST_V2) && Array.isArray(json === null || json === void 0 ? void 0 : json.manifests)); } function isMainIndexFile(name) { return name === "index.json"; } function getOciPlatformInfoFromOptionString(platform) { const [os, architecture, variant] = platform.split("/"); return { os, architecture, variant, }; } function getImageManifestInfo(manifests, platformInfo) { // manifests do not always have a plaform, this is the case for OCI // images built with Docker when no platform is specified if (manifests.length === 1 && !manifests[0].platform) { return manifests[0]; } return getBestMatchForPlatform(manifests, platformInfo, (target) => { var _a, _b, _c; return { os: (_a = target.platform) === null || _a === void 0 ? void 0 : _a.os, architecture: (_b = target.platform) === null || _b === void 0 ? void 0 : _b.architecture, variant: (_c = target.platform) === null || _c === void 0 ? void 0 : _c.variant, }; }); } function getImageConfig(manifests, platformInfo) { return getBestMatchForPlatform(manifests, platformInfo, (target) => { return { os: target.os, architecture: target.architecture, }; }); } function getBestMatchForPlatform(manifests, platformInfo, extractPlatformInfoFromManifest) { const matches = manifests.filter((item) => { const { os, architecture } = extractPlatformInfoFromManifest(item); return os === platformInfo.os && architecture === platformInfo.architecture; }); if (matches.length > 1) { return matches.find((item) => { const { variant } = extractPlatformInfoFromManifest(item); return variant === platformInfo.variant; }); } return matches[0] || undefined; } //# sourceMappingURL=layer.js.map