UNPKG

zip-iterator

Version:

Extract contents from zip archive type using an iterator API using streams or paths. Use stream interface and pipe transforms to add decompression algorithms

279 lines (278 loc) 9.33 kB
/** * ZIP Extra Field Parsing * * Handles parsing of specific extra field types: * - ZIP64 Extended Information (0x0001) * - Info-ZIP Unix Extra Field (0x5855 / 0x7875) * - Extended Timestamp (0x5455) */ import { crc32, readUInt64LE } from 'extract-base-iterator'; import * as C from './constants.js'; // ============================================================================= // ZIP64 Extended Information Extra Field (0x0001) // ============================================================================= /** * Parse ZIP64 Extended Information Extra Field * * Field layout (fields only present if corresponding local header field was 0xFFFFFFFF): * Original Size: 8 bytes * Compressed Size: 8 bytes * Relative Header Offset: 8 bytes (Central Directory only) * Disk Start Number: 4 bytes (Central Directory only) * * In Local File Header, typically only sizes are present. * * @param field - The extra field to parse * @param needUncompressed - Whether uncompressed size marker was 0xFFFFFFFF * @param needCompressed - Whether compressed size marker was 0xFFFFFFFF */ export function parseZip64ExtraField(field, needUncompressed, needCompressed) { if (field.id !== C.EXTID_ZIP64) { return null; } const data = field.data; let offset = 0; const result = { uncompressedSize: 0, compressedSize: 0 }; // Fields appear in order but only if the corresponding header field was 0xFFFFFFFF if (needUncompressed) { if (offset + 8 > data.length) return null; result.uncompressedSize = readUInt64LE(data, offset); offset += 8; } if (needCompressed) { if (offset + 8 > data.length) return null; result.compressedSize = readUInt64LE(data, offset); offset += 8; } // Header offset and disk start are only in Central Directory entries // In Local File Headers we typically only have sizes if (offset + 8 <= data.length) { result.headerOffset = readUInt64LE(data, offset); offset += 8; } if (offset + 4 <= data.length) { result.diskStart = data.readUInt32LE(offset); } return result; } // ============================================================================= // Info-ZIP Unix Extra Field (Old) - 0x5855 // ============================================================================= /** * Parse Info-ZIP Unix Extra Field (old format) * * Field layout: * Access Time: 4 bytes (Unix timestamp) * Modification Time: 4 bytes (Unix timestamp) * UID: 2 bytes (optional, in Central Directory) * GID: 2 bytes (optional, in Central Directory) */ export function parseUnixExtraFieldOld(field) { if (field.id !== C.EXTID_UNIX_OLD) { return null; } const data = field.data; if (data.length < 8) { return null; } const result = { atime: data.readUInt32LE(0), mtime: data.readUInt32LE(4) }; // UID and GID are optional (present in Central Directory) if (data.length >= 12) { result.uid = data.readUInt16LE(8); result.gid = data.readUInt16LE(10); } return result; } // ============================================================================= // Info-ZIP New Unix Extra Field - 0x7875 // ============================================================================= /** * Parse Info-ZIP New Unix Extra Field * * This format supports variable-length UID/GID values. * * Field layout: * Version: 1 byte (currently 1) * UIDSize: 1 byte * UID: UIDSize bytes * GIDSize: 1 byte * GID: GIDSize bytes */ export function parseUnixExtraFieldNew(field) { if (field.id !== C.EXTID_UNIX_NEW) { return null; } const data = field.data; if (data.length < 3) { return null; } const version = data[0]; if (version !== 1) { return null; // Unknown version } let offset = 1; const result = {}; // Parse UID const uidSize = data[offset++]; if (offset + uidSize > data.length) { return null; } result.uid = readVariableInt(data, offset, uidSize); offset += uidSize; // Parse GID if (offset >= data.length) { return result; } const gidSize = data[offset++]; if (offset + gidSize > data.length) { return null; } result.gid = readVariableInt(data, offset, gidSize); return result; } // ============================================================================= // Extended Timestamp Extra Field - 0x5455 // ============================================================================= /** * Parse Extended Timestamp Extra Field * * Field layout: * Flags: 1 byte (bit 0: mtime, bit 1: atime, bit 2: ctime) * mtime: 4 bytes (if flag bit 0 set) * atime: 4 bytes (if flag bit 1 set) - Local header only * ctime: 4 bytes (if flag bit 2 set) - Local header only */ export function parseExtendedTimestamp(field) { if (field.id !== C.EXTID_EXTENDED_TIMESTAMP) { return null; } const data = field.data; if (data.length < 1) { return null; } const flags = data[0]; let offset = 1; const result = {}; // Modification time if ((flags & 0x01) !== 0 && offset + 4 <= data.length) { result.mtime = data.readUInt32LE(offset); offset += 4; } // Access time (Local header only) if ((flags & 0x02) !== 0 && offset + 4 <= data.length) { result.atime = data.readUInt32LE(offset); offset += 4; } // Creation time (Local header only) if ((flags & 0x04) !== 0 && offset + 4 <= data.length) { result.ctime = data.readUInt32LE(offset); } return result; } // ============================================================================= // ASi Unix Extra Field - 0x756e // ============================================================================= /** * Parse ASi Unix Extra Field * * This format is used by some archivers and contains Unix file mode * which can be used for symlink detection in streaming mode. * * Field layout (BIG ENDIAN - unusual for ZIP): * CRC: 4 bytes (CRC32 of remaining data) * Mode: 2 bytes (Unix file mode including type bits) * SizDev: 4 bytes (symlink size or device numbers) * UID: 2 bytes (user ID) * GID: 2 bytes (group ID) * Link: variable (symlink target path, if symlink) */ export function parseAsiExtraField(field) { if (field.id !== C.EXTID_ASI) { return null; } const data = field.data; // Minimum size: CRC(4) + Mode(2) + SizDev(4) + UID(2) + GID(2) = 14 bytes if (data.length < 14) { return null; } // Read and verify CRC (big-endian) const storedCrc = data.readUInt32BE(0); const dataAfterCrc = data.slice(4); const computedCrc = crc32(dataAfterCrc); if (storedCrc !== computedCrc) { // CRC mismatch - corrupt or wrong format return null; } // Parse fields (big-endian) const mode = data.readUInt16BE(4); const sizDev = data.readUInt32BE(6); const uid = data.readUInt16BE(10); const gid = data.readUInt16BE(12); const result = { mode, uid, gid }; // If this is a symlink (S_IFLNK = 0o120000 = 0xA000), read link path // Check file type bits: (mode & 0xF000) === 0xA000 if ((mode & 0xf000) === 0xa000 && sizDev > 0 && data.length >= 14 + sizDev) { result.linkPath = data.toString('utf8', 14, 14 + sizDev); } return result; } /** * Find and parse ASi Unix info from extra fields * Returns file mode which can be used for symlink detection */ export function findAsiInfo(fields) { for(let i = 0; i < fields.length; i++){ if (fields[i].id === C.EXTID_ASI) { const info = parseAsiExtraField(fields[i]); if (info) return info; } } return null; } // ============================================================================= // Helpers // ============================================================================= /** * Read variable-length little-endian integer * Used for UID/GID in new Unix extra field format */ function readVariableInt(buf, offset, size) { let value = 0; for(let i = 0; i < size; i++){ value += buf[offset + i] << i * 8; } return value; } /** * Find and parse Unix info from extra fields * Tries new format first, falls back to old format */ export function findUnixInfo(fields) { // Try new format first (more common in modern archives) for(let i = 0; i < fields.length; i++){ if (fields[i].id === C.EXTID_UNIX_NEW) { const info = parseUnixExtraFieldNew(fields[i]); if (info) return info; } } // Fall back to old format for(let i = 0; i < fields.length; i++){ if (fields[i].id === C.EXTID_UNIX_OLD) { const info = parseUnixExtraFieldOld(fields[i]); if (info) return info; } } return null; } /** * Find and parse extended timestamp from extra fields */ export function findExtendedTimestamp(fields) { for(let i = 0; i < fields.length; i++){ if (fields[i].id === C.EXTID_EXTENDED_TIMESTAMP) { const ts = parseExtendedTimestamp(fields[i]); if (ts) return ts; } } return null; }