UNPKG

@cyclonedx/cdxgen

Version:

Creates CycloneDX Software Bill of Materials (SBOM) from source or container image

493 lines (472 loc) 14.6 kB
import { closeSync, openSync, readSync, statSync } from "node:fs"; import { basename } from "node:path"; const GGUF_METADATA_TYPES = { UINT8: 0, INT8: 1, UINT16: 2, INT16: 3, UINT32: 4, INT32: 5, FLOAT32: 6, BOOL: 7, STRING: 8, ARRAY: 9, UINT64: 10, INT64: 11, FLOAT64: 12, }; const GGUF_FILE_TYPE_NAMES = new Map([ [0, "F32"], [1, "F16"], [2, "Q4_0"], [3, "Q4_1"], [7, "Q8_0"], [8, "Q5_0"], [9, "Q5_1"], [10, "Q2_K"], [11, "Q3_K_S"], [12, "Q3_K_M"], [13, "Q3_K_L"], [14, "Q4_K_S"], [15, "Q4_K_M"], [16, "Q5_K_S"], [17, "Q5_K_M"], [18, "Q6_K"], [19, "IQ2_XXS"], [20, "IQ2_XS"], [21, "Q2_K_S"], [22, "IQ3_XS"], [23, "IQ3_XXS"], [24, "IQ1_S"], [25, "IQ4_NL"], [26, "IQ3_S"], [27, "IQ3_M"], [28, "IQ2_S"], [29, "IQ2_M"], [30, "IQ4_XS"], [31, "IQ1_M"], [32, "BF16"], [36, "TQ1_0"], [37, "TQ2_0"], [38, "MXFP4_MOE"], [39, "NVFP4"], [40, "Q1_0"], ]); const GGUF_SIDECAR_NAMES = new Set(["mmproj", "mtp"]); const GGUF_TYPE_NAMES = new Set(["LoRA", "vocab"]); const GGUF_TEXT_TOKEN_REGEX = /^[A-Za-z0-9\s]+$/u; const GGUF_ENCODING_TOKEN_REGEX = /^[A-Za-z0-9_]+$/u; const GGUF_SIZE_LABEL_REGEX = /^(?:\d+x)?(?:\d+\.)?\d+[A-Za-z]$/u; const GGUF_SIZE_LABEL_SUFFIX_REGEX = /^[A-Za-z]+(?:\d+\.)?\d+[A-Za-z]+$/u; const GGUF_INITIAL_READ_BYTES = 64 * 1024; const GGUF_MAX_HEADER_READ_BYTES = 8 * 1024 * 1024; const GGUF_MAX_STRING_LENGTH = 1024 * 1024; const GGUF_MAX_ARRAY_LENGTH = 64 * 1024; const GGUF_MAX_METADATA_COUNT = 16 * 1024; const GGUF_TEXT_DECODER = new TextDecoder(); const MAX_SAFE_BIGINT = BigInt(Number.MAX_SAFE_INTEGER); const MIN_SAFE_BIGINT = BigInt(Number.MIN_SAFE_INTEGER); const GGUF_MAX_METADATA_TYPE = GGUF_METADATA_TYPES.FLOAT64; const isDigits = (value) => value.length > 0 && /^[0-9]+$/u.test(value); const isFiveDigitToken = (value) => value.length === 5 && isDigits(value); const isValidGgufTextToken = (value) => value.length > 0 && GGUF_TEXT_TOKEN_REGEX.test(value); const isVersionToken = (value) => { if (!value?.startsWith("v")) { return false; } return value .slice(1) .split(".") .every((segment) => isDigits(segment)); }; const parseShardSuffix = (tokens) => { if (tokens.length < 3) { return { tokens, }; } const shardCountToken = tokens.at(-1); const ofToken = tokens.at(-2); const shardIndexToken = tokens.at(-3); if ( ofToken !== "of" || !isFiveDigitToken(shardIndexToken) || !isFiveDigitToken(shardCountToken) ) { return { tokens, }; } return { shard: `${shardIndexToken}-of-${shardCountToken}`, shardCount: Number.parseInt(shardCountToken, 10), shardIndex: Number.parseInt(shardIndexToken, 10), tokens: tokens.slice(0, -3), }; }; const parseGgufSizeAndFineTune = (tokens) => { if (!tokens.length || !GGUF_SIZE_LABEL_REGEX.test(tokens[0])) { return undefined; } let sizeLabel = tokens[0]; let fineTuneTokens = tokens.slice(1); if ( fineTuneTokens.length > 0 && GGUF_SIZE_LABEL_SUFFIX_REGEX.test(fineTuneTokens[0]) ) { sizeLabel = `${sizeLabel}-${fineTuneTokens[0]}`; fineTuneTokens = fineTuneTokens.slice(1); } if (fineTuneTokens.some((token) => !isValidGgufTextToken(token))) { return undefined; } return { fineTune: fineTuneTokens.length ? fineTuneTokens.join("-") : undefined, sizeLabel, }; }; const parseGgufPrefixTokens = (tokens) => { for (let baseLength = tokens.length - 1; baseLength >= 1; baseLength--) { const baseTokens = tokens.slice(0, baseLength); if (baseTokens.some((token) => !isValidGgufTextToken(token))) { continue; } const sizeAndFineTune = parseGgufSizeAndFineTune(tokens.slice(baseLength)); if (!sizeAndFineTune) { continue; } return { baseName: baseTokens.join("-"), fineTune: sizeAndFineTune.fineTune, sizeLabel: sizeAndFineTune.sizeLabel, }; } return undefined; }; const ensureReadableBytes = (dataView, state, byteLength, label) => { if (state.offset + byteLength > dataView.byteLength) { throw new RangeError( `Truncated GGUF header while reading ${label} at byte ${state.offset}`, ); } }; const readLengthValue = ( dataView, state, label, maxValue = Number.MAX_SAFE_INTEGER, ) => { ensureReadableBytes(dataView, state, 8, label); const value = dataView.getBigUint64(state.offset, true); state.offset += 8; if (value > MAX_SAFE_BIGINT) { throw new RangeError( `GGUF ${label} ${value.toString()} exceeds supported JavaScript limits`, ); } const numericValue = Number(value); if (numericValue > maxValue) { throw new RangeError(`GGUF ${label} ${numericValue} exceeds allowed limit`); } return numericValue; }; const bigIntToMetadataNumber = (value) => { if (value <= MAX_SAFE_BIGINT && value >= MIN_SAFE_BIGINT) { return Number(value); } return value.toString(); }; const readGgufValue = (dataView, state, type) => { switch (type) { case GGUF_METADATA_TYPES.UINT8: ensureReadableBytes(dataView, state, 1, "uint8 metadata value"); return dataView.getUint8(state.offset++); case GGUF_METADATA_TYPES.INT8: ensureReadableBytes(dataView, state, 1, "int8 metadata value"); return dataView.getInt8(state.offset++); case GGUF_METADATA_TYPES.UINT16: { ensureReadableBytes(dataView, state, 2, "uint16 metadata value"); const value = dataView.getUint16(state.offset, true); state.offset += 2; return value; } case GGUF_METADATA_TYPES.INT16: { ensureReadableBytes(dataView, state, 2, "int16 metadata value"); const value = dataView.getInt16(state.offset, true); state.offset += 2; return value; } case GGUF_METADATA_TYPES.UINT32: { ensureReadableBytes(dataView, state, 4, "uint32 metadata value"); const value = dataView.getUint32(state.offset, true); state.offset += 4; return value; } case GGUF_METADATA_TYPES.INT32: { ensureReadableBytes(dataView, state, 4, "int32 metadata value"); const value = dataView.getInt32(state.offset, true); state.offset += 4; return value; } case GGUF_METADATA_TYPES.FLOAT32: { ensureReadableBytes(dataView, state, 4, "float32 metadata value"); const value = dataView.getFloat32(state.offset, true); state.offset += 4; return value; } case GGUF_METADATA_TYPES.BOOL: { ensureReadableBytes(dataView, state, 1, "bool metadata value"); const value = dataView.getUint8(state.offset); state.offset += 1; if (value !== 0 && value !== 1) { throw new Error(`Invalid GGUF boolean metadata value ${value}`); } return value === 1; } case GGUF_METADATA_TYPES.STRING: { const length = readLengthValue( dataView, state, "string length", GGUF_MAX_STRING_LENGTH, ); ensureReadableBytes(dataView, state, length, "string bytes"); const bytes = new Uint8Array( dataView.buffer, dataView.byteOffset + state.offset, length, ); state.offset += length; return GGUF_TEXT_DECODER.decode(bytes); } case GGUF_METADATA_TYPES.ARRAY: { ensureReadableBytes(dataView, state, 4, "array item type"); const itemType = dataView.getUint32(state.offset, true); state.offset += 4; if (itemType > GGUF_MAX_METADATA_TYPE) { throw new Error(`Unsupported GGUF metadata type ${itemType}`); } const length = readLengthValue( dataView, state, "array length", GGUF_MAX_ARRAY_LENGTH, ); const values = []; for (let index = 0; index < length; index++) { values.push(readGgufValue(dataView, state, itemType)); } return values; } case GGUF_METADATA_TYPES.UINT64: { ensureReadableBytes(dataView, state, 8, "uint64 metadata value"); const value = dataView.getBigUint64(state.offset, true); state.offset += 8; return bigIntToMetadataNumber(value); } case GGUF_METADATA_TYPES.INT64: { ensureReadableBytes(dataView, state, 8, "int64 metadata value"); const value = dataView.getBigInt64(state.offset, true); state.offset += 8; return bigIntToMetadataNumber(value); } case GGUF_METADATA_TYPES.FLOAT64: { ensureReadableBytes(dataView, state, 8, "float64 metadata value"); const value = dataView.getFloat64(state.offset, true); state.offset += 8; return value; } default: throw new Error(`Unsupported GGUF metadata type ${type}`); } }; const readGgufPrefix = (filePath, readLength) => { const fileDescriptor = openSync(filePath, "r"); try { const prefix = Buffer.alloc(readLength); const bytesRead = readSync(fileDescriptor, prefix, 0, readLength, 0); return prefix.subarray(0, bytesRead); } finally { closeSync(fileDescriptor); } }; /** * Convert a GGUF `general.file_type` enumeration value to a stable encoding label. * * The mapping follows the current `llama_ftype` enumeration used by GGUF writers. * Unknown values intentionally return `undefined` so callers can fall back to * filename-derived or executor-specific hints. * * @param {number|string|undefined} fileType numeric GGUF file type value * @returns {string|undefined} encoding label such as `Q5_K_M` or `BF16` */ export function ggufFileTypeName(fileType) { const normalizedFileType = Number(fileType); if (!Number.isInteger(normalizedFileType)) { return undefined; } return GGUF_FILE_TYPE_NAMES.get(normalizedFileType); } /** * Parse a GGUF filename using the upstream naming convention documented by the * GGUF specification. * * The convention is intentionally strict and will return `undefined` for files * that do not follow the recommended layout. Callers that need to support older * or community-specific names can use this as a first pass and then fall back to * project-specific heuristics. * * @param {string} filePathOrName absolute path or bare filename * @returns {Object|undefined} parsed filename details when recognized */ export function parseGgufFilename(filePathOrName) { const fileName = basename(String(filePathOrName || "").trim()); if (!fileName.endsWith(".gguf")) { return undefined; } let tokens = basename(fileName, ".gguf").split("-"); const parsed = { fileName, }; if (!tokens.length || tokens.some((token) => token.length === 0)) { return undefined; } if (GGUF_SIDECAR_NAMES.has(tokens[0])) { parsed.sidecar = tokens[0]; tokens = tokens.slice(1); } const shardDetails = parseShardSuffix(tokens); tokens = shardDetails.tokens; if (shardDetails.shard) { parsed.shard = shardDetails.shard; parsed.shardCount = shardDetails.shardCount; parsed.shardIndex = shardDetails.shardIndex; } if (tokens.length && GGUF_TYPE_NAMES.has(tokens.at(-1))) { parsed.type = tokens.at(-1); tokens = tokens.slice(0, -1); } if (!tokens.length) { return undefined; } const versionIndex = tokens.findLastIndex((token) => isVersionToken(token)); if (versionIndex < 1) { return undefined; } parsed.version = tokens[versionIndex]; const suffixTokens = tokens.slice(versionIndex + 1); if (suffixTokens.length > 1) { return undefined; } if (suffixTokens.length === 1) { const encodingToken = suffixTokens[0]; if ( !GGUF_ENCODING_TOKEN_REGEX.test(encodingToken) || GGUF_TYPE_NAMES.has(encodingToken) ) { return undefined; } parsed.encoding = encodingToken; } const prefixDetails = parseGgufPrefixTokens(tokens.slice(0, versionIndex)); if (!prefixDetails) { return undefined; } parsed.baseName = prefixDetails.baseName; parsed.sizeLabel = prefixDetails.sizeLabel; if (prefixDetails.fineTune) { parsed.fineTune = prefixDetails.fineTune; } return parsed; } /** * Parse GGUF metadata from an in-memory header buffer. * * @param {Uint8Array|Buffer} buffer GGUF header buffer * @returns {Object} parsed metadata map */ export function parseGgufMetadataBuffer(buffer) { if (!buffer?.byteLength || buffer.byteLength < 24) { throw new RangeError("Truncated GGUF header: need at least 24 bytes"); } const dataView = new DataView( buffer.buffer, buffer.byteOffset, buffer.byteLength, ); if ( String.fromCharCode( dataView.getUint8(0), dataView.getUint8(1), dataView.getUint8(2), dataView.getUint8(3), ) !== "GGUF" ) { throw new Error("Invalid GGUF magic"); } const state = { offset: 4 }; ensureReadableBytes(dataView, state, 4, "format version"); const version = dataView.getUint32(state.offset, true); state.offset += 4; const tensorCount = readLengthValue(dataView, state, "tensor count"); const metadataCount = readLengthValue( dataView, state, "metadata count", GGUF_MAX_METADATA_COUNT, ); const metadata = { "gguf.version": version, "gguf.tensorCount": tensorCount, "gguf.metadataCount": metadataCount, }; for (let index = 0; index < metadataCount; index++) { const key = readGgufValue(dataView, state, GGUF_METADATA_TYPES.STRING); ensureReadableBytes(dataView, state, 4, "metadata value type"); const valueType = dataView.getUint32(state.offset, true); state.offset += 4; if (valueType > GGUF_MAX_METADATA_TYPE) { throw new Error(`Unsupported GGUF metadata type ${valueType}`); } metadata[key] = readGgufValue(dataView, state, valueType); } return metadata; } /** * Read selected GGUF metadata keys from a model artifact without loading the whole file. * * @param {string} filePath GGUF file path * @returns {Object|undefined} parsed GGUF metadata */ export function readGgufMetadata(filePath) { const fileSize = statSync(filePath).size; let readLength = Math.min( Math.max(GGUF_INITIAL_READ_BYTES, 24), fileSize || GGUF_INITIAL_READ_BYTES, ); let lastError; while ( readLength > 0 && readLength <= Math.min(fileSize, GGUF_MAX_HEADER_READ_BYTES) ) { try { return parseGgufMetadataBuffer(readGgufPrefix(filePath, readLength)); } catch (error) { lastError = error; if ( !(error instanceof RangeError) || readLength >= fileSize || readLength >= GGUF_MAX_HEADER_READ_BYTES ) { throw error; } readLength = Math.min( readLength * 2, fileSize, GGUF_MAX_HEADER_READ_BYTES, ); } } throw lastError || new RangeError("Unable to read GGUF metadata header"); }