snyk-docker-plugin
Version:
Snyk CLI docker plugin
541 lines (475 loc) • 16.4 kB
text/typescript
import * as Debug from "debug";
import { createReadStream } from "fs";
import { normalize as normalizePath, sep as pathSeparator } from "path";
import { Readable } from "stream";
import { extract, Extract } from "tar-stream";
import { getPlatformFromConfig, InvalidArchiveError } from "..";
import { streamToJson } from "../../stream-utils";
import { PluginOptions } from "../../types";
import { decompressMaybe } from "../decompress-maybe";
import { extractImageLayer } from "../layer";
import {
ExtractAction,
ExtractedLayers,
ExtractedLayersAndManifest,
ImageConfig,
OciArchiveManifest,
OciImageIndex,
OciManifestInfo,
OciPlatformInfo,
} from "../types";
const debug = Debug("snyk");
const MEDIATYPE_DOCKER_MANIFEST_V2 =
"application/vnd.docker.distribution.manifest.v2+json";
const MEDIATYPE_DOCKER_MANIFEST_LIST_V2 =
"application/vnd.docker.distribution.manifest.list.v2+json";
const MEDIATYPE_OCI_MANIFEST_V1 = "application/vnd.oci.image.manifest.v1+json";
const MEDIATYPE_OCI_MANIFEST_LIST_V1 =
"application/vnd.oci.image.index.v1+json";
// Maximum size for JSON metadata files. Matches the limit in streamToJson.
// Files larger than this are layer blobs, not JSON metadata.
const MAX_JSON_SIZE_BYTES = 2 * 1024 * 1024;
/**
* Retrieve the products of files content from the specified oci-archive.
*
* Uses a two-pass approach:
* 1. First pass: Parse JSON metadata (manifests, configs, indexes) to determine
* which layers are needed for the target platform.
* 2. Second pass: Extract only the required layer blobs.
*
* This avoids memory issues from buffering large layer blobs unnecessarily.
*
* @param ociArchiveFilesystemPath Path to image file saved in oci-archive format.
* @param extractActions Array of pattern-callbacks pairs.
* @param options PluginOptions
* @returns Array of extracted files products sorted by the reverse order of the layers from last to first.
*/
export async function extractArchive(
ociArchiveFilesystemPath: string,
extractActions: ExtractAction[],
options: PluginOptions,
): Promise<ExtractedLayersAndManifest> {
// Pass 1: Extract JSON metadata
const metadata = await extractMetadata(ociArchiveFilesystemPath);
// Determine which manifest and layers we need
const { manifest, imageConfig } = resolveManifestAndConfig(metadata, options);
// Get the list of layer digests we need to extract
const requiredLayerDigests = new Set(
manifest.layers.map((layer) => layer.digest),
);
// Pass 2: Extract the required layers
const { layers, failedDigests } = await extractLayers(
ociArchiveFilesystemPath,
requiredLayerDigests,
extractActions,
);
// Report any layer extraction failures
if (failedDigests.size > 0) {
const failures = Array.from(failedDigests.entries())
.map(([digest, error]) => `${digest}: ${error}`)
.join("; ");
debug(`Failed to extract ${failedDigests.size} layer(s): ${failures}`);
}
// Build the result
const filteredLayers = manifest.layers
.filter((layer) => layers[layer.digest])
.map((layer) => layers[layer.digest])
.reverse();
if (filteredLayers.length === 0) {
// Provide more context about why extraction failed
if (failedDigests.size > 0) {
const failedList = Array.from(failedDigests.keys()).join(", ");
throw new InvalidArchiveError(
`Failed to extract any layers from the image. ` +
`${failedDigests.size} layer(s) failed: ${failedList}`,
);
}
throw new InvalidArchiveError(
"We found no layers in the provided image. " +
"The archive may be corrupted or in an unsupported format.",
);
}
// Warn if some but not all layers failed (partial extraction)
const missingLayers = manifest.layers.filter(
(layer) => !layers[layer.digest],
);
if (missingLayers.length > 0) {
debug(
`Warning: ${missingLayers.length} layer(s) from manifest were not extracted: ` +
missingLayers.map((l) => l.digest).join(", "),
);
}
return {
layers: filteredLayers,
manifest,
imageConfig,
};
}
interface ArchiveMetadata {
mainIndexFile?: OciImageIndex;
manifests: Record<string, OciArchiveManifest>;
indexFiles: Record<string, OciImageIndex>;
configs: ImageConfig[];
}
/**
* Pass 1: Extract only JSON metadata from the archive.
*
* Skips large files (> MAX_JSON_SIZE_BYTES) since they're layer blobs, not JSON.
* For small files, attempts JSON parse; binary data fails fast on the first byte check.
*/
async function extractMetadata(
ociArchiveFilesystemPath: string,
): Promise<ArchiveMetadata> {
return new Promise((resolve, reject) => {
const tarExtractor: Extract = extract();
const manifests: Record<string, OciArchiveManifest> = {};
const configs: ImageConfig[] = [];
let mainIndexFile: OciImageIndex | undefined;
const indexFiles: Record<string, OciImageIndex> = {};
tarExtractor.on("entry", async (header, stream, next) => {
try {
if (header.type === "file") {
const normalizedHeaderName = normalizePath(header.name);
if (isMainIndexFile(normalizedHeaderName)) {
mainIndexFile = await streamToJson<OciImageIndex>(stream);
} else if (
isBlobPath(normalizedHeaderName) &&
(header.size === undefined || header.size <= MAX_JSON_SIZE_BYTES)
) {
// Small blob file - try to parse as JSON metadata
// Large files and non-blob files (oci-layout, etc.) are skipped
const jsonContent = await tryParseJsonMetadata(stream);
if (jsonContent !== undefined) {
const digest = getDigestFromPath(normalizedHeaderName);
if (isArchiveManifest(jsonContent)) {
manifests[digest] = jsonContent;
} else if (isImageIndexFile(jsonContent)) {
indexFiles[digest] = jsonContent as OciImageIndex;
} else if (isImageConfigFile(jsonContent)) {
configs.push(jsonContent as ImageConfig);
}
}
}
// All other files (non-blob, large blobs) are drained below
}
} catch (err) {
debug(
`Error processing OCI archive entry ${header.name}: ${err.message}`,
);
}
stream.resume(); // Drain the stream
next();
});
tarExtractor.on("finish", () => {
resolve({ mainIndexFile, manifests, indexFiles, configs });
});
tarExtractor.on("error", (error) => {
reject(error);
});
createReadStream(ociArchiveFilesystemPath)
.pipe(decompressMaybe())
.pipe(tarExtractor);
});
}
/**
* Attempts to parse a stream as JSON metadata.
* Returns undefined if the stream doesn't contain valid JSON (e.g., it's a layer blob).
*
* Uses a fast-fail check: if the first byte isn't '{' or '[', it's not JSON.
* Note: This doesn't handle JSON with leading whitespace, which is technically valid
* but never produced by standard OCI tooling.
*/
async function tryParseJsonMetadata(stream: Readable): Promise<unknown> {
return new Promise((resolve) => {
let firstChunk = true;
const chunks: string[] = [];
let bytes = 0;
let resolved = false;
const cleanup = () => {
stream.removeAllListeners("data");
stream.removeAllListeners("end");
// Keep a no-op error handler to prevent unhandled error events
// when the stream is drained after fast-fail
stream.removeAllListeners("error");
// tslint:disable-next-line:no-empty
stream.on("error", () => {});
};
stream.on("data", (chunk: Buffer) => {
if (firstChunk) {
firstChunk = false;
// Fast-fail: JSON must start with { or [
const firstByte = chunk[0];
if (firstByte !== 0x7b && firstByte !== 0x5b) {
// 0x7b = '{', 0x5b = '['
resolved = true;
cleanup();
resolve(undefined);
return;
}
}
bytes += chunk.length;
if (bytes <= MAX_JSON_SIZE_BYTES) {
chunks.push(chunk.toString("utf8"));
}
});
stream.on("end", () => {
if (resolved) {
return;
}
if (chunks.length === 0) {
resolve(undefined);
return;
}
try {
resolve(JSON.parse(chunks.join("")));
} catch {
resolve(undefined);
}
});
stream.on("error", () => {
if (!resolved) {
resolve(undefined);
}
});
});
}
interface LayerExtractionResult {
layers: Record<string, ExtractedLayers>;
failedDigests: Map<string, string>;
}
/**
* Pass 2: Extract only the specified layer blobs.
*
* Tracks extraction failures so the caller can report which layers failed
* rather than silently returning incomplete results.
*/
async function extractLayers(
ociArchiveFilesystemPath: string,
requiredDigests: Set<string>,
extractActions: ExtractAction[],
): Promise<LayerExtractionResult> {
return new Promise((resolve, reject) => {
const tarExtractor: Extract = extract();
const layers: Record<string, ExtractedLayers> = {};
const failedDigests: Map<string, string> = new Map();
tarExtractor.on("entry", async (header, stream, next) => {
try {
if (header.type === "file") {
const normalizedHeaderName = normalizePath(header.name);
if (
!isMainIndexFile(normalizedHeaderName) &&
isBlobPath(normalizedHeaderName)
) {
const digest = getDigestFromPath(normalizedHeaderName);
if (requiredDigests.has(digest)) {
// This is a layer we need - extract it
try {
const layer = await extractImageLayer(stream, extractActions);
layers[digest] = layer;
} catch (error) {
const errorMessage =
error instanceof Error ? error.message : String(error);
debug(`Failed to extract layer ${digest}: ${errorMessage}`);
failedDigests.set(digest, errorMessage);
}
}
}
}
} catch (err) {
debug(`Error processing archive entry ${header.name}: ${err.message}`);
}
stream.resume();
next();
});
tarExtractor.on("finish", () => {
resolve({ layers, failedDigests });
});
tarExtractor.on("error", (error) => {
reject(error);
});
createReadStream(ociArchiveFilesystemPath)
.pipe(decompressMaybe())
.pipe(tarExtractor);
});
}
/**
* Checks if a path is in the blobs directory (blobs/<algo>/<hash>).
* Non-blob files like oci-layout should be skipped.
*/
function isBlobPath(normalizedPath: string): boolean {
const parts = normalizedPath.split(pathSeparator).filter(Boolean);
return parts[0] === "blobs" && parts.length >= 3;
}
/**
* Extracts digest from a blob path in the format blobs/<algo>/<hash>.
* Returns the digest as <algo>:<hash> to match manifest digest format.
*
* Caller should verify isBlobPath() first.
*/
function getDigestFromPath(normalizedPath: string): string {
const headerParts = normalizedPath.split(pathSeparator).filter(Boolean);
const algorithm = headerParts[1];
const hash = headerParts[headerParts.length - 1];
return `${algorithm}:${hash}`;
}
function resolveManifestAndConfig(
metadata: ArchiveMetadata,
options: Partial<PluginOptions>,
): {
manifest: OciArchiveManifest;
imageConfig: ImageConfig;
} {
const filteredConfigs = metadata.configs.filter((config) => {
return config?.os !== "unknown" || config?.architecture !== "unknown";
});
const platform =
options?.platform ||
(filteredConfigs.length === 1
? getPlatformFromConfig(filteredConfigs[0])
: "linux/amd64");
const platformInfo = getOciPlatformInfoFromOptionString(platform as string);
const manifest = getManifest(
metadata.mainIndexFile,
metadata.manifests,
metadata.indexFiles,
platformInfo,
);
if (!manifest) {
throw new InvalidArchiveError(
`Could not find manifest for platform ${platformInfo.os}/${platformInfo.architecture} in archive`,
);
}
const imageConfig = getImageConfig(metadata.configs, platformInfo);
if (imageConfig === undefined) {
throw new InvalidArchiveError(
"Could not find the image config in the provided image",
);
}
return { manifest, imageConfig };
}
function getManifest(
imageIndex: OciImageIndex | undefined,
manifestCollection: Record<string, OciArchiveManifest>,
indexFiles: Record<string, OciImageIndex>,
platformInfo: OciPlatformInfo,
): OciArchiveManifest | undefined {
if (!imageIndex) {
return manifestCollection[Object.keys(manifestCollection)[0]];
}
const allManifests = getAllManifestsIndexItems(imageIndex, indexFiles);
const manifestInfo = getImageManifestInfo(allManifests, platformInfo);
if (manifestInfo === undefined) {
throw new InvalidArchiveError(
"Image does not support the requested CPU architecture or operating system",
);
}
return manifestCollection[manifestInfo.digest];
}
function getAllManifestsIndexItems(
imageIndex: OciImageIndex,
indexFiles: Record<string, OciImageIndex>,
): OciManifestInfo[] {
const allManifestsInfo: OciManifestInfo[] = [];
for (const manifest of imageIndex.manifests) {
if (
manifest.mediaType === MEDIATYPE_OCI_MANIFEST_V1 ||
manifest.mediaType === MEDIATYPE_DOCKER_MANIFEST_V2
) {
// an archive manifest file
allManifestsInfo.push(manifest);
} else if (
manifest.mediaType === MEDIATYPE_OCI_MANIFEST_LIST_V1 ||
manifest.mediaType === MEDIATYPE_DOCKER_MANIFEST_LIST_V2
) {
// nested index
const index = indexFiles[manifest.digest];
if (index) {
allManifestsInfo.push(...getAllManifestsIndexItems(index, indexFiles));
}
}
}
return allManifestsInfo;
}
function isArchiveManifest(manifest: any): manifest is OciArchiveManifest {
return (
manifest !== undefined && manifest.layers && Array.isArray(manifest.layers)
);
}
function isImageConfigFile(json: any): json is ImageConfig {
return json !== undefined && json.architecture && json.rootfs;
}
function isImageIndexFile(json: any): boolean {
return (
(json?.mediaType === MEDIATYPE_OCI_MANIFEST_LIST_V1 ||
json?.mediaType === MEDIATYPE_DOCKER_MANIFEST_LIST_V2) &&
Array.isArray(json?.manifests)
);
}
function isMainIndexFile(name: string): boolean {
return name === "index.json";
}
function getOciPlatformInfoFromOptionString(platform: string): OciPlatformInfo {
const [os, architecture, variant] = platform.split("/") as [
os: string,
architecture: string,
variant: string | undefined,
];
return {
os,
architecture,
variant,
};
}
function getImageManifestInfo(
manifests: OciManifestInfo[],
platformInfo: OciPlatformInfo,
): OciManifestInfo | undefined {
// manifests do not always have a plaform, this is the case for OCI
// images built with Docker when no platform is specified
if (manifests.length === 1 && !manifests[0].platform) {
return manifests[0];
}
return getBestMatchForPlatform(
manifests,
platformInfo,
(target: OciManifestInfo): OciPlatformInfo => {
return {
os: target.platform?.os,
architecture: target.platform?.architecture,
variant: target.platform?.variant,
};
},
);
}
function getImageConfig(
manifests: ImageConfig[],
platformInfo: OciPlatformInfo,
): ImageConfig | undefined {
return getBestMatchForPlatform(
manifests,
platformInfo,
(target: ImageConfig): OciPlatformInfo => {
return {
os: target.os,
architecture: target.architecture,
};
},
);
}
function getBestMatchForPlatform<T>(
manifests: T[],
platformInfo: OciPlatformInfo,
extractPlatformInfoFromManifest: (target: T) => OciPlatformInfo,
): T | undefined {
const matches = manifests.filter((item) => {
const { os, architecture } = extractPlatformInfoFromManifest(item);
return os === platformInfo.os && architecture === platformInfo.architecture;
});
if (matches.length > 1) {
return matches.find((item) => {
const { variant } = extractPlatformInfoFromManifest(item);
return variant === platformInfo.variant;
});
}
return matches[0] || undefined;
}