zip-iterator
Version:
Extract contents from zip archive type using an iterator API using streams or paths. Use stream interface and pipe transforms to add decompression algorithms
231 lines (230 loc) • 8.53 kB
JavaScript
/**
* ZIP Header Parsing
*
* Functions for parsing Local File Headers and Data Descriptors.
* All parsing is forward-only - no seeking required.
*/ import { bufferEquals, readUInt64LE } from 'extract-base-iterator';
import * as C from './constants.js';
import { decodeCP437 } from './cp437.js';
// =============================================================================
// Local File Header Parsing
// =============================================================================
/**
* Check if buffer at offset contains Local File Header signature
*/ export function isLocalFileHeader(buf, offset) {
return bufferEquals(buf, offset, C.SIG_LOCAL_FILE);
}
/**
* Check if buffer at offset contains Central Directory signature
* (indicates end of local file entries)
*/ export function isCentralDirectory(buf, offset) {
return bufferEquals(buf, offset, C.SIG_CENTRAL_DIR);
}
/**
* Check if buffer at offset contains Data Descriptor signature
*/ export function isDataDescriptor(buf, offset) {
return bufferEquals(buf, offset, C.SIG_DATA_DESCRIPTOR);
}
/**
* Parse Local File Header from buffer
*
* @param buf - Buffer containing header data
* @param offset - Offset to start of header (at signature)
* @returns Parsed header or null if not enough data
*/ export function parseLocalFileHeader(buf, offset) {
// Fast path: check if buffer is large enough for minimal header
const minHeaderSize = C.LOCAL_HEADER_FIXED_SIZE;
if (buf.length < offset + minHeaderSize) {
return null;
}
// Verify signature
if (!isLocalFileHeader(buf, offset)) {
return null;
}
// Parse fixed fields
const versionNeeded = buf.readUInt16LE(offset + 4);
const flags = buf.readUInt16LE(offset + 6);
const compressionMethod = buf.readUInt16LE(offset + 8);
const lastModTime = buf.readUInt16LE(offset + 10);
const lastModDate = buf.readUInt16LE(offset + 12);
const crc32 = buf.readUInt32LE(offset + 14);
let compressedSize = buf.readUInt32LE(offset + 18);
let uncompressedSize = buf.readUInt32LE(offset + 22);
const fileNameLength = buf.readUInt16LE(offset + 26);
const extraFieldLength = buf.readUInt16LE(offset + 28);
// Calculate total header size
const headerSize = C.LOCAL_HEADER_FIXED_SIZE + fileNameLength + extraFieldLength;
// Check if we have the complete header (re-check with actual sizes)
if (buf.length < offset + headerSize) {
return null;
}
// Parse filename
// UTF-8 flag indicates filename is UTF-8 encoded, otherwise use CP437
const isUtf8 = (flags & C.FLAG_UTF8) !== 0;
let fileName;
if (isUtf8) {
fileName = buf.toString('utf8', offset + 30, offset + 30 + fileNameLength);
} else {
// CP437 is the original IBM PC character set and ZIP default
fileName = decodeCP437(buf, offset + 30, offset + 30 + fileNameLength);
}
// Parse extra fields
const extraFieldStart = offset + 30 + fileNameLength;
const extraFields = parseExtraFields(buf.slice(extraFieldStart, extraFieldStart + extraFieldLength));
// Check for ZIP64 markers
let isZip64 = false;
if (compressedSize === C.ZIP64_MARKER_32 || uncompressedSize === C.ZIP64_MARKER_32) {
isZip64 = true;
// Try to get actual sizes from ZIP64 extra field
const zip64Extra = findExtraField(extraFields, C.EXTID_ZIP64);
if (zip64Extra && zip64Extra.data.length >= 16) {
uncompressedSize = readUInt64LE(zip64Extra.data, 0);
compressedSize = readUInt64LE(zip64Extra.data, 8);
}
}
// Compute derived values
const isEncrypted = (flags & C.FLAG_ENCRYPTED) !== 0;
const isStrongEncrypted = (flags & C.FLAG_STRONG_ENCRYPTION) !== 0;
const hasDataDescriptor = (flags & C.FLAG_DATA_DESCRIPTOR) !== 0;
const mtime = decodeDateTime(lastModDate, lastModTime);
return {
versionNeeded,
flags,
compressionMethod,
lastModTime,
lastModDate,
crc32,
compressedSize,
uncompressedSize,
fileNameLength,
extraFieldLength,
fileName,
extraFields,
isEncrypted,
isStrongEncrypted,
hasDataDescriptor,
isUtf8,
isZip64,
mtime,
headerSize
};
}
// =============================================================================
// Extra Field Parsing
// =============================================================================
/**
* Parse extra fields from buffer
*
* Extra field format:
* Header ID (2 bytes) + Data Size (2 bytes) + Data (variable)
*/ export function parseExtraFields(buf) {
const fields = [];
let offset = 0;
while(offset + 4 <= buf.length){
const id = buf.readUInt16LE(offset);
const size = buf.readUInt16LE(offset + 2);
if (offset + 4 + size > buf.length) {
break; // Truncated extra field
}
fields.push({
id,
size,
data: buf.slice(offset + 4, offset + 4 + size)
});
offset += 4 + size;
}
return fields;
}
/**
* Find extra field by ID
*/ export function findExtraField(fields, id) {
for(let i = 0; i < fields.length; i++){
if (fields[i].id === id) {
return fields[i];
}
}
return null;
}
// =============================================================================
// Data Descriptor Parsing
// =============================================================================
/**
* Parse Data Descriptor from buffer
*
* Data descriptors can appear with or without the optional signature.
* This function handles both cases.
*
* @param buf - Buffer containing descriptor data
* @param offset - Offset to start of descriptor
* @param isZip64 - Whether to expect 8-byte sizes
* @returns Parsed descriptor or null if not enough data
*/ export function parseDataDescriptor(buf, offset, isZip64) {
// Determine expected sizes
const sizeBytes = isZip64 ? 8 : 4;
const minSize = 4 + sizeBytes * 2; // CRC + compressed + uncompressed
const minSizeWithSig = 4 + minSize; // signature + above
// Check if we have the optional signature
const hasSignature = bufferEquals(buf, offset, C.SIG_DATA_DESCRIPTOR);
const expectedSize = hasSignature ? minSizeWithSig : minSize;
if (buf.length < offset + expectedSize) {
return null;
}
// Adjust offset if signature is present
const dataOffset = hasSignature ? offset + 4 : offset;
const crc32 = buf.readUInt32LE(dataOffset);
let compressedSize;
let uncompressedSize;
if (isZip64) {
compressedSize = readUInt64LE(buf, dataOffset + 4);
uncompressedSize = readUInt64LE(buf, dataOffset + 12);
} else {
compressedSize = buf.readUInt32LE(dataOffset + 4);
uncompressedSize = buf.readUInt32LE(dataOffset + 8);
}
return {
crc32,
compressedSize,
uncompressedSize,
size: expectedSize
};
}
// =============================================================================
// Date/Time Decoding
// =============================================================================
/**
* Decode MS-DOS date/time format to JavaScript Date
*
* MS-DOS date format (16 bits):
* Bits 0-4: Day (1-31)
* Bits 5-8: Month (1-12)
* Bits 9-15: Year (offset from 1980)
*
* MS-DOS time format (16 bits):
* Bits 0-4: Seconds/2 (0-29)
* Bits 5-10: Minutes (0-59)
* Bits 11-15: Hours (0-23)
*/ export function decodeDateTime(date, time) {
const year = (date >> 9 & 0x7f) + 1980;
const month = (date >> 5 & 0x0f) - 1; // 0-indexed for Date constructor
const day = date & 0x1f;
const hour = time >> 11 & 0x1f;
const minute = time >> 5 & 0x3f;
const second = (time & 0x1f) * 2;
return new Date(year, month, day, hour, minute, second);
}
/**
* Get entry type from filename and external attributes
*/ export function getEntryType(fileName, externalAttributes, platform) {
// Directory detection: filename ends with /
if (fileName.charAt(fileName.length - 1) === '/') {
return 'directory';
}
// Unix platform: check file type bits
if (platform === C.PLATFORM_UNIX) {
const unixType = externalAttributes >> 28 & 0x0f;
if (unixType === C.UNIX_TYPE_DIR) return 'directory';
if (unixType === C.UNIX_TYPE_SYMLINK) return 'symlink';
// Note: Hard links are stored differently in ZIP
}
return 'file';
}