UNPKG

file-type

Version:

Detect the file type of a file, stream, or data

1,913 lines (1,647 loc) 44.9 kB
/** Primary entry point, Node.js specific entry point is index.js */ import * as Token from 'token-types'; import * as strtok3 from 'strtok3/core'; import {GzipHandler} from '@tokenizer/inflate'; import {concatUint8Arrays} from 'uint8array-extras'; import { stringToBytes, tarHeaderChecksumMatches, uint32SyncSafeToken, } from './tokens.js'; import {extensions, mimeTypes} from './supported.js'; import { maximumUntrustedSkipSizeInBytes, ParserHardLimitError, safeIgnore, checkBytes, hasUnknownFileSize, } from './parser.js'; import {detectZip} from './detectors/zip.js'; import {detectEbml} from './detectors/ebml.js'; import {detectPng} from './detectors/png.js'; import {detectAsf} from './detectors/asf.js'; export const reasonableDetectionSizeInBytes = 4100; // A fair amount of file-types are detectable within this range. const maximumMpegOffsetTolerance = reasonableDetectionSizeInBytes - 2; const maximumNestedGzipDetectionSizeInBytes = maximumUntrustedSkipSizeInBytes; const maximumNestedGzipProbeDepth = 1; const unknownSizeGzipProbeTimeoutInMilliseconds = 100; const maximumId3HeaderSizeInBytes = maximumUntrustedSkipSizeInBytes; const maximumTiffTagCount = 512; const maximumDetectionReentryCount = 256; const maximumTiffStreamIfdOffsetInBytes = 1024 * 1024; const maximumTiffIfdOffsetInBytes = maximumUntrustedSkipSizeInBytes; export function normalizeSampleSize(sampleSize) { // `sampleSize` is an explicit caller-controlled tuning knob, not untrusted file input. // Preserve valid caller-requested probe depth here; applications must bound attacker-derived option values themselves. if (!Number.isFinite(sampleSize)) { return reasonableDetectionSizeInBytes; } return Math.max(1, Math.trunc(sampleSize)); } function normalizeMpegOffsetTolerance(mpegOffsetTolerance) { // This value controls scan depth and therefore worst-case CPU work. if (!Number.isFinite(mpegOffsetTolerance)) { return 0; } return Math.max(0, Math.min(maximumMpegOffsetTolerance, Math.trunc(mpegOffsetTolerance))); } function getKnownFileSizeOrMaximum(fileSize) { if (!Number.isFinite(fileSize)) { return Number.MAX_SAFE_INTEGER; } return Math.max(0, fileSize); } // Wrap stream in an identity TransformStream to avoid BYOB readers. // Node.js has a bug where calling controller.close() inside a BYOB stream's // pull() callback does not resolve pending reader.read() calls, causing // permanent hangs on streams shorter than the requested read size. // Using a default (non-BYOB) reader via TransformStream avoids this. function toDefaultStream(stream) { return stream.pipeThrough(new TransformStream()); } function readWithSignal(reader, signal) { if (signal === undefined) { return reader.read(); } signal.throwIfAborted(); return Promise.race([ reader.read(), new Promise((_resolve, reject) => { signal.addEventListener('abort', () => { reject(signal.reason); reader.cancel(signal.reason).catch(() => {}); }, {once: true}); }), ]); } function createByteLimitedReadableStream(stream, maximumBytes) { const reader = stream.getReader(); let emittedBytes = 0; let sourceDone = false; let sourceCanceled = false; const cancelSource = async reason => { if ( sourceDone || sourceCanceled ) { return; } sourceCanceled = true; await reader.cancel(reason); }; return new ReadableStream({ async pull(controller) { if (emittedBytes >= maximumBytes) { controller.close(); await cancelSource(); return; } const {done, value} = await reader.read(); if ( done || !value ) { sourceDone = true; controller.close(); return; } const remainingBytes = maximumBytes - emittedBytes; if (value.length > remainingBytes) { controller.enqueue(value.subarray(0, remainingBytes)); emittedBytes += remainingBytes; controller.close(); await cancelSource(); return; } controller.enqueue(value); emittedBytes += value.length; }, async cancel(reason) { await cancelSource(reason); }, }); } export async function fileTypeFromStream(stream, options) { return new FileTypeParser(options).fromStream(stream); } export async function fileTypeFromBuffer(input, options) { return new FileTypeParser(options).fromBuffer(input); } export async function fileTypeFromBlob(blob, options) { return new FileTypeParser(options).fromBlob(blob); } export async function fileTypeFromTokenizer(tokenizer, options) { return new FileTypeParser(options).fromTokenizer(tokenizer); } export async function fileTypeStream(webStream, options) { return new FileTypeParser(options).toDetectionStream(webStream, options); } export class FileTypeParser { constructor(options) { const normalizedMpegOffsetTolerance = normalizeMpegOffsetTolerance(options?.mpegOffsetTolerance); this.options = { ...options, mpegOffsetTolerance: normalizedMpegOffsetTolerance, }; this.detectors = [...(this.options.customDetectors ?? []), {id: 'core', detect: this.detectConfident}, {id: 'core.imprecise', detect: this.detectImprecise}]; this.tokenizerOptions = { abortSignal: this.options.signal, }; this.gzipProbeDepth = 0; } getTokenizerOptions() { return { ...this.tokenizerOptions, }; } createTokenizerFromWebStream(stream) { return strtok3.fromWebStream(toDefaultStream(stream), this.getTokenizerOptions()); } async parseTokenizer(tokenizer, detectionReentryCount = 0) { this.detectionReentryCount = detectionReentryCount; const initialPosition = tokenizer.position; // Iterate through all file-type detectors for (const detector of this.detectors) { let fileType; try { fileType = await detector.detect(tokenizer); } catch (error) { if (error instanceof strtok3.EndOfStreamError) { return; } if (error instanceof ParserHardLimitError) { return; } throw error; } if (fileType) { return fileType; } if (initialPosition !== tokenizer.position) { return undefined; // Cannot proceed scanning of the tokenizer is at an arbitrary position } } } async fromTokenizer(tokenizer) { try { return await this.parseTokenizer(tokenizer); } finally { await tokenizer.close(); } } async fromBuffer(input) { if (!(input instanceof Uint8Array || input instanceof ArrayBuffer)) { throw new TypeError(`Expected the \`input\` argument to be of type \`Uint8Array\` or \`ArrayBuffer\`, got \`${typeof input}\``); } const buffer = input instanceof Uint8Array ? input : new Uint8Array(input); if (!(buffer?.length > 1)) { return; } return this.fromTokenizer(strtok3.fromBuffer(buffer, this.getTokenizerOptions())); } async fromBlob(blob) { this.options.signal?.throwIfAborted(); const tokenizer = strtok3.fromBlob(blob, this.getTokenizerOptions()); return this.fromTokenizer(tokenizer); } async fromStream(stream) { this.options.signal?.throwIfAborted(); const tokenizer = this.createTokenizerFromWebStream(stream); return this.fromTokenizer(tokenizer); } async fromFile(path) { this.options.signal?.throwIfAborted(); // TODO: Remove this when `strtok3.fromFile()` safely rejects non-regular filesystem objects without a pathname race. const [{default: fsPromises}, {FileTokenizer}] = await Promise.all([ import('node:fs/promises'), import('strtok3'), ]); const fileHandle = await fsPromises.open(path, fsPromises.constants.O_RDONLY | fsPromises.constants.O_NONBLOCK); const fileStat = await fileHandle.stat(); if (!fileStat.isFile()) { await fileHandle.close(); return; } const tokenizer = new FileTokenizer(fileHandle, { ...this.getTokenizerOptions(), fileInfo: {path, size: fileStat.size}, }); return this.fromTokenizer(tokenizer); } async toDetectionStream(stream, options) { this.options.signal?.throwIfAborted(); const sampleSize = normalizeSampleSize(options?.sampleSize ?? reasonableDetectionSizeInBytes); let detectedFileType; let streamEnded = false; const reader = stream.getReader(); const chunks = []; let totalSize = 0; try { while (totalSize < sampleSize) { const {value, done} = await readWithSignal(reader, this.options.signal); if (done || !value) { streamEnded = true; break; } chunks.push(value); totalSize += value.length; } if ( !streamEnded && totalSize === sampleSize ) { const {value, done} = await readWithSignal(reader, this.options.signal); if (done || !value) { streamEnded = true; } else { chunks.push(value); totalSize += value.length; } } } finally { reader.releaseLock(); } if (totalSize > 0) { const sample = chunks.length === 1 ? chunks[0] : concatUint8Arrays(chunks); try { detectedFileType = await this.fromBuffer(sample.subarray(0, sampleSize)); } catch (error) { if (!(error instanceof strtok3.EndOfStreamError)) { throw error; } detectedFileType = undefined; } if ( !streamEnded && detectedFileType?.ext === 'pages' ) { detectedFileType = { ext: 'zip', mime: 'application/zip', }; } } // Prepend collected chunks and pipe the rest through const transformStream = new TransformStream({ start(controller) { for (const chunk of chunks) { controller.enqueue(chunk); } }, transform(chunk, controller) { controller.enqueue(chunk); }, }); const newStream = stream.pipeThrough(transformStream); newStream.fileType = detectedFileType; return newStream; } async detectGzip(tokenizer) { if (this.gzipProbeDepth >= maximumNestedGzipProbeDepth) { return { ext: 'gz', mime: 'application/gzip', }; } const gzipHandler = new GzipHandler(tokenizer); const limitedInflatedStream = createByteLimitedReadableStream(gzipHandler.inflate(), maximumNestedGzipDetectionSizeInBytes); const hasUnknownSize = hasUnknownFileSize(tokenizer); let timeout; let probeSignal; let probeParser; let compressedFileType; if (hasUnknownSize) { const timeoutController = new AbortController(); timeout = setTimeout(() => { timeoutController.abort(new DOMException(`Operation timed out after ${unknownSizeGzipProbeTimeoutInMilliseconds} ms`, 'TimeoutError')); }, unknownSizeGzipProbeTimeoutInMilliseconds); probeSignal = this.options.signal === undefined ? timeoutController.signal : AbortSignal.any([this.options.signal, timeoutController.signal]); probeParser = new FileTypeParser({ ...this.options, signal: probeSignal, }); probeParser.gzipProbeDepth = this.gzipProbeDepth + 1; } else { this.gzipProbeDepth++; } try { compressedFileType = await (probeParser ?? this).fromStream(limitedInflatedStream); } catch (error) { if ( error?.name === 'AbortError' && probeSignal?.reason?.name !== 'TimeoutError' ) { throw error; } // Timeout, decompression, or inner-detection failures are expected for non-tar gzip files. } finally { clearTimeout(timeout); if (!hasUnknownSize) { this.gzipProbeDepth--; } } if (compressedFileType?.ext === 'tar') { return { ext: 'tar.gz', mime: 'application/gzip', }; } return { ext: 'gz', mime: 'application/gzip', }; } check(header, options) { return checkBytes(this.buffer, header, options); } checkString(header, options) { return this.check(stringToBytes(header, options?.encoding), options); } // Detections with a high degree of certainty in identifying the correct file type detectConfident = async tokenizer => { this.buffer = new Uint8Array(reasonableDetectionSizeInBytes); // Keep reading until EOF if the file size is unknown. if (tokenizer.fileInfo.size === undefined) { tokenizer.fileInfo.size = Number.MAX_SAFE_INTEGER; } this.tokenizer = tokenizer; if (hasUnknownFileSize(tokenizer)) { await tokenizer.peekBuffer(this.buffer, {length: 3, mayBeLess: true}); if (this.check([0x1F, 0x8B, 0x8])) { return this.detectGzip(tokenizer); } } await tokenizer.peekBuffer(this.buffer, {length: 32, mayBeLess: true}); // -- 2-byte signatures -- if (this.check([0x42, 0x4D])) { return { ext: 'bmp', mime: 'image/bmp', }; } if (this.check([0x0B, 0x77])) { return { ext: 'ac3', mime: 'audio/vnd.dolby.dd-raw', }; } if (this.check([0x78, 0x01])) { return { ext: 'dmg', mime: 'application/x-apple-diskimage', }; } if (this.check([0x4D, 0x5A])) { return { ext: 'exe', mime: 'application/x-msdownload', }; } if (this.check([0x25, 0x21])) { await tokenizer.peekBuffer(this.buffer, {length: 24, mayBeLess: true}); if ( this.checkString('PS-Adobe-', {offset: 2}) && this.checkString(' EPSF-', {offset: 14}) ) { return { ext: 'eps', mime: 'application/eps', }; } return { ext: 'ps', mime: 'application/postscript', }; } if ( this.check([0x1F, 0xA0]) || this.check([0x1F, 0x9D]) ) { return { ext: 'Z', mime: 'application/x-compress', }; } if (this.check([0xC7, 0x71])) { return { ext: 'cpio', mime: 'application/x-cpio', }; } if (this.check([0x60, 0xEA])) { return { ext: 'arj', mime: 'application/x-arj', }; } // -- 3-byte signatures -- if (this.check([0xEF, 0xBB, 0xBF])) { // UTF-8-BOM if (this.detectionReentryCount >= maximumDetectionReentryCount) { return; } this.detectionReentryCount++; // Strip off UTF-8-BOM await this.tokenizer.ignore(3); return this.detectConfident(tokenizer); } if (this.check([0x47, 0x49, 0x46])) { return { ext: 'gif', mime: 'image/gif', }; } if (this.check([0x49, 0x49, 0xBC])) { return { ext: 'jxr', mime: 'image/vnd.ms-photo', }; } if (this.check([0x1F, 0x8B, 0x8])) { return this.detectGzip(tokenizer); } if (this.check([0x42, 0x5A, 0x68])) { return { ext: 'bz2', mime: 'application/x-bzip2', }; } if (this.checkString('ID3')) { await safeIgnore(tokenizer, 6, { maximumLength: 6, reason: 'ID3 header prefix', }); // Skip ID3 header until the header size const id3HeaderLength = await tokenizer.readToken(uint32SyncSafeToken); const isUnknownFileSize = hasUnknownFileSize(tokenizer); if ( !Number.isFinite(id3HeaderLength) || id3HeaderLength < 0 // Keep ID3 probing bounded for unknown-size streams to avoid attacker-controlled large skips. || ( isUnknownFileSize && ( id3HeaderLength > maximumId3HeaderSizeInBytes || (tokenizer.position + id3HeaderLength) > maximumId3HeaderSizeInBytes ) ) ) { return; } if (tokenizer.position + id3HeaderLength > tokenizer.fileInfo.size) { if (isUnknownFileSize) { return; } return { ext: 'mp3', mime: 'audio/mpeg', }; } try { await safeIgnore(tokenizer, id3HeaderLength, { maximumLength: isUnknownFileSize ? maximumId3HeaderSizeInBytes : tokenizer.fileInfo.size, reason: 'ID3 payload', }); } catch (error) { if (error instanceof strtok3.EndOfStreamError) { return; } throw error; } if (this.detectionReentryCount >= maximumDetectionReentryCount) { return; } this.detectionReentryCount++; return this.parseTokenizer(tokenizer, this.detectionReentryCount); // Skip ID3 header, recursion } // Musepack, SV7 if (this.checkString('MP+')) { return { ext: 'mpc', mime: 'audio/x-musepack', }; } if ( (this.buffer[0] === 0x43 || this.buffer[0] === 0x46) && this.check([0x57, 0x53], {offset: 1}) ) { return { ext: 'swf', mime: 'application/x-shockwave-flash', }; } // -- 4-byte signatures -- // Requires a sample size of 4 bytes if (this.check([0xFF, 0xD8, 0xFF])) { if (this.check([0xF7], {offset: 3})) { // JPG7/SOF55, indicating a ISO/IEC 14495 / JPEG-LS file return { ext: 'jls', mime: 'image/jls', }; } return { ext: 'jpg', mime: 'image/jpeg', }; } if (this.check([0x4F, 0x62, 0x6A, 0x01])) { return { ext: 'avro', mime: 'application/avro', }; } if (this.checkString('FLIF')) { return { ext: 'flif', mime: 'image/flif', }; } if (this.checkString('8BPS')) { return { ext: 'psd', mime: 'image/vnd.adobe.photoshop', }; } // Musepack, SV8 if (this.checkString('MPCK')) { return { ext: 'mpc', mime: 'audio/x-musepack', }; } if (this.checkString('FORM')) { return { ext: 'aif', mime: 'audio/aiff', }; } if (this.checkString('icns', {offset: 0})) { return { ext: 'icns', mime: 'image/icns', }; } // Zip-based file formats // Need to be before the `zip` check if (this.check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature return detectZip(tokenizer); } if (this.checkString('OggS')) { // This is an OGG container await tokenizer.ignore(28); const type = new Uint8Array(8); await tokenizer.readBuffer(type); // Needs to be before `ogg` check if (checkBytes(type, [0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64])) { return { ext: 'opus', mime: 'audio/ogg; codecs=opus', }; } // If ' theora' in header. if (checkBytes(type, [0x80, 0x74, 0x68, 0x65, 0x6F, 0x72, 0x61])) { return { ext: 'ogv', mime: 'video/ogg', }; } // If '\x01video' in header. if (checkBytes(type, [0x01, 0x76, 0x69, 0x64, 0x65, 0x6F, 0x00])) { return { ext: 'ogm', mime: 'video/ogg', }; } // If ' FLAC' in header https://xiph.org/flac/faq.html if (checkBytes(type, [0x7F, 0x46, 0x4C, 0x41, 0x43])) { return { ext: 'oga', mime: 'audio/ogg', }; } // 'Speex ' in header https://en.wikipedia.org/wiki/Speex if (checkBytes(type, [0x53, 0x70, 0x65, 0x65, 0x78, 0x20, 0x20])) { return { ext: 'spx', mime: 'audio/ogg', }; } // If '\x01vorbis' in header if (checkBytes(type, [0x01, 0x76, 0x6F, 0x72, 0x62, 0x69, 0x73])) { return { ext: 'ogg', mime: 'audio/ogg', }; } // Default OGG container https://www.iana.org/assignments/media-types/application/ogg return { ext: 'ogx', mime: 'application/ogg', }; } if ( this.check([0x50, 0x4B]) && (this.buffer[2] === 0x3 || this.buffer[2] === 0x5 || this.buffer[2] === 0x7) && (this.buffer[3] === 0x4 || this.buffer[3] === 0x6 || this.buffer[3] === 0x8) ) { return { ext: 'zip', mime: 'application/zip', }; } if (this.checkString('MThd')) { return { ext: 'mid', mime: 'audio/midi', }; } if ( this.checkString('wOFF') && ( this.check([0x00, 0x01, 0x00, 0x00], {offset: 4}) || this.checkString('OTTO', {offset: 4}) ) ) { return { ext: 'woff', mime: 'font/woff', }; } if ( this.checkString('wOF2') && ( this.check([0x00, 0x01, 0x00, 0x00], {offset: 4}) || this.checkString('OTTO', {offset: 4}) ) ) { return { ext: 'woff2', mime: 'font/woff2', }; } if (this.check([0xD4, 0xC3, 0xB2, 0xA1]) || this.check([0xA1, 0xB2, 0xC3, 0xD4])) { return { ext: 'pcap', mime: 'application/vnd.tcpdump.pcap', }; } // Sony DSD Stream File (DSF) if (this.checkString('DSD ')) { return { ext: 'dsf', mime: 'audio/x-dsf', // Non-standard }; } if (this.checkString('LZIP')) { return { ext: 'lz', mime: 'application/lzip', }; } if (this.checkString('fLaC')) { return { ext: 'flac', mime: 'audio/flac', }; } if (this.check([0x42, 0x50, 0x47, 0xFB])) { return { ext: 'bpg', mime: 'image/bpg', }; } if (this.checkString('wvpk')) { return { ext: 'wv', mime: 'audio/wavpack', }; } if (this.checkString('%PDF')) { // Assume this is just a normal PDF return { ext: 'pdf', mime: 'application/pdf', }; } if (this.check([0x00, 0x61, 0x73, 0x6D])) { return { ext: 'wasm', mime: 'application/wasm', }; } // TIFF, little-endian type if (this.check([0x49, 0x49])) { const fileType = await this.readTiffHeader(false); if (fileType) { return fileType; } } // TIFF, big-endian type if (this.check([0x4D, 0x4D])) { const fileType = await this.readTiffHeader(true); if (fileType) { return fileType; } } if (this.checkString('MAC ')) { return { ext: 'ape', mime: 'audio/ape', }; } // https://github.com/file/file/blob/master/magic/Magdir/matroska if (this.check([0x1A, 0x45, 0xDF, 0xA3])) { // Root element: EBML return detectEbml(tokenizer); } if (this.checkString('SQLi')) { return { ext: 'sqlite', mime: 'application/x-sqlite3', }; } if (this.check([0x4E, 0x45, 0x53, 0x1A])) { return { ext: 'nes', mime: 'application/x-nintendo-nes-rom', }; } if (this.checkString('Cr24')) { return { ext: 'crx', mime: 'application/x-google-chrome-extension', }; } if ( this.checkString('MSCF') || this.checkString('ISc(') ) { return { ext: 'cab', mime: 'application/vnd.ms-cab-compressed', }; } if (this.check([0xED, 0xAB, 0xEE, 0xDB])) { return { ext: 'rpm', mime: 'application/x-rpm', }; } if (this.check([0xC5, 0xD0, 0xD3, 0xC6])) { return { ext: 'eps', mime: 'application/eps', }; } if (this.check([0x28, 0xB5, 0x2F, 0xFD])) { return { ext: 'zst', mime: 'application/zstd', }; } if (this.check([0x7F, 0x45, 0x4C, 0x46])) { return { ext: 'elf', mime: 'application/x-elf', }; } if (this.check([0x21, 0x42, 0x44, 0x4E])) { return { ext: 'pst', mime: 'application/vnd.ms-outlook', }; } if (this.checkString('PAR1') || this.checkString('PARE')) { return { ext: 'parquet', mime: 'application/vnd.apache.parquet', }; } if (this.checkString('ttcf')) { return { ext: 'ttc', mime: 'font/collection', }; } if ( this.check([0xFE, 0xED, 0xFA, 0xCE]) // 32-bit, big-endian || this.check([0xFE, 0xED, 0xFA, 0xCF]) // 64-bit, big-endian || this.check([0xCE, 0xFA, 0xED, 0xFE]) // 32-bit, little-endian || this.check([0xCF, 0xFA, 0xED, 0xFE]) // 64-bit, little-endian ) { return { ext: 'macho', mime: 'application/x-mach-binary', }; } if (this.check([0x04, 0x22, 0x4D, 0x18])) { return { ext: 'lz4', mime: 'application/x-lz4', // Informal, used by freedesktop.org shared-mime-info }; } if (this.checkString('regf')) { return { ext: 'dat', mime: 'application/x-ft-windows-registry-hive', }; } // SPSS Statistical Data File if (this.checkString('$FL2') || this.checkString('$FL3')) { return { ext: 'sav', mime: 'application/x-spss-sav', }; } // -- 5-byte signatures -- if (this.check([0x4F, 0x54, 0x54, 0x4F, 0x00])) { return { ext: 'otf', mime: 'font/otf', }; } if (this.checkString('#!AMR')) { return { ext: 'amr', mime: 'audio/amr', }; } if (this.checkString(String.raw`{\rtf`)) { return { ext: 'rtf', mime: 'application/rtf', }; } if (this.check([0x46, 0x4C, 0x56, 0x01])) { return { ext: 'flv', mime: 'video/x-flv', }; } if (this.checkString('IMPM')) { return { ext: 'it', mime: 'audio/x-it', }; } if ( this.checkString('-lh0-', {offset: 2}) || this.checkString('-lh1-', {offset: 2}) || this.checkString('-lh2-', {offset: 2}) || this.checkString('-lh3-', {offset: 2}) || this.checkString('-lh4-', {offset: 2}) || this.checkString('-lh5-', {offset: 2}) || this.checkString('-lh6-', {offset: 2}) || this.checkString('-lh7-', {offset: 2}) || this.checkString('-lzs-', {offset: 2}) || this.checkString('-lz4-', {offset: 2}) || this.checkString('-lz5-', {offset: 2}) || this.checkString('-lhd-', {offset: 2}) ) { return { ext: 'lzh', mime: 'application/x-lzh-compressed', }; } // MPEG program stream (PS or MPEG-PS) if (this.check([0x00, 0x00, 0x01, 0xBA])) { // MPEG-PS, MPEG-1 Part 1 if (this.check([0x21], {offset: 4, mask: [0xF1]})) { return { ext: 'mpg', // May also be .ps, .mpeg mime: 'video/MP1S', }; } // MPEG-PS, MPEG-2 Part 1 if (this.check([0x44], {offset: 4, mask: [0xC4]})) { return { ext: 'mpg', // May also be .mpg, .m2p, .vob or .sub mime: 'video/MP2P', }; } } if (this.checkString('ITSF')) { return { ext: 'chm', mime: 'application/vnd.ms-htmlhelp', }; } if (this.check([0xCA, 0xFE, 0xBA, 0xBE])) { // Java bytecode and Mach-O universal binaries have the same magic number. // We disambiguate based on the next 4 bytes, as done by `file`. // See https://github.com/file/file/blob/master/magic/Magdir/cafebabe const machOArchitectureCount = Token.UINT32_BE.get(this.buffer, 4); const javaClassFileMajorVersion = Token.UINT16_BE.get(this.buffer, 6); if (machOArchitectureCount > 0 && machOArchitectureCount <= 30) { return { ext: 'macho', mime: 'application/x-mach-binary', }; } if (javaClassFileMajorVersion > 30) { return { ext: 'class', mime: 'application/java-vm', }; } } if (this.checkString('.RMF')) { return { ext: 'rm', mime: 'application/vnd.rn-realmedia', }; } // -- 5-byte signatures -- if (this.checkString('DRACO')) { return { ext: 'drc', mime: 'application/x-ft-draco', }; } // -- 6-byte signatures -- if (this.check([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00])) { return { ext: 'xz', mime: 'application/x-xz', }; } if (this.checkString('<?xml ')) { return { ext: 'xml', mime: 'application/xml', }; } if (this.check([0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C])) { return { ext: '7z', mime: 'application/x-7z-compressed', }; } if ( this.check([0x52, 0x61, 0x72, 0x21, 0x1A, 0x7]) && (this.buffer[6] === 0x0 || this.buffer[6] === 0x1) ) { return { ext: 'rar', mime: 'application/x-rar-compressed', }; } if (this.checkString('solid ')) { return { ext: 'stl', mime: 'model/stl', }; } if (this.checkString('AC')) { const version = new Token.StringType(4, 'latin1').get(this.buffer, 2); if (/^\d+$/v.test(version) && version >= 1000 && version <= 1050) { return { ext: 'dwg', mime: 'image/vnd.dwg', }; } } if (this.checkString('070707')) { return { ext: 'cpio', mime: 'application/x-cpio', }; } // -- 7-byte signatures -- if (this.checkString('BLENDER')) { return { ext: 'blend', mime: 'application/x-blender', }; } if (this.checkString('!<arch>')) { await tokenizer.ignore(8); const string = await tokenizer.readToken(new Token.StringType(13, 'ascii')); if (string === 'debian-binary') { return { ext: 'deb', mime: 'application/x-deb', }; } return { ext: 'ar', mime: 'application/x-unix-archive', }; } if ( this.checkString('WEBVTT') && ( // One of LF, CR, tab, space, or end of file must follow "WEBVTT" per the spec (see `fixture/fixture-vtt-*.vtt` for examples). Note that `\0` is technically the null character (there is no such thing as an EOF character). However, checking for `\0` gives us the same result as checking for the end of the stream. (['\n', '\r', '\t', ' ', '\0'].some(char7 => this.checkString(char7, {offset: 6})))) ) { return { ext: 'vtt', mime: 'text/vtt', }; } // -- 8-byte signatures -- if (this.check([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) { return detectPng(tokenizer); } if (this.check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) { return { ext: 'arrow', mime: 'application/vnd.apache.arrow.file', }; } if (this.check([0x67, 0x6C, 0x54, 0x46, 0x02, 0x00, 0x00, 0x00])) { return { ext: 'glb', mime: 'model/gltf-binary', }; } // `mov` format variants if ( this.check([0x66, 0x72, 0x65, 0x65], {offset: 4}) // `free` || this.check([0x6D, 0x64, 0x61, 0x74], {offset: 4}) // `mdat` MJPEG || this.check([0x6D, 0x6F, 0x6F, 0x76], {offset: 4}) // `moov` || this.check([0x77, 0x69, 0x64, 0x65], {offset: 4}) // `wide` ) { return { ext: 'mov', mime: 'video/quicktime', }; } // -- 9-byte signatures -- if (this.check([0x49, 0x49, 0x52, 0x4F, 0x08, 0x00, 0x00, 0x00, 0x18])) { return { ext: 'orf', mime: 'image/x-olympus-orf', }; } if (this.checkString('gimp xcf ')) { return { ext: 'xcf', mime: 'image/x-xcf', }; } // File Type Box (https://en.wikipedia.org/wiki/ISO_base_media_file_format) // It's not required to be first, but it's recommended to be. Almost all ISO base media files start with `ftyp` box. // `ftyp` box must contain a brand major identifier, which must consist of ISO 8859-1 printable characters. // Here we check for 8859-1 printable characters (for simplicity, it's a mask which also catches one non-printable character). if ( this.checkString('ftyp', {offset: 4}) && (this.buffer[8] & 0x60) !== 0x00 // Brand major, first character ASCII? ) { // They all can have MIME `video/mp4` except `application/mp4` special-case which is hard to detect. // For some cases, we're specific, everything else falls to `video/mp4` with `mp4` extension. const brandMajor = new Token.StringType(4, 'latin1').get(this.buffer, 8).replace('\0', ' ').trim(); switch (brandMajor) { case 'avif': case 'avis': return {ext: 'avif', mime: 'image/avif'}; case 'mif1': return {ext: 'heic', mime: 'image/heif'}; case 'msf1': return {ext: 'heic', mime: 'image/heif-sequence'}; case 'heic': case 'heix': return {ext: 'heic', mime: 'image/heic'}; case 'hevc': case 'hevx': return {ext: 'heic', mime: 'image/heic-sequence'}; case 'qt': return {ext: 'mov', mime: 'video/quicktime'}; case 'M4V': case 'M4VH': case 'M4VP': return {ext: 'm4v', mime: 'video/x-m4v'}; case 'M4P': return {ext: 'm4p', mime: 'video/mp4'}; case 'M4B': return {ext: 'm4b', mime: 'audio/mp4'}; case 'M4A': return {ext: 'm4a', mime: 'audio/x-m4a'}; case 'F4V': return {ext: 'f4v', mime: 'video/mp4'}; case 'F4P': return {ext: 'f4p', mime: 'video/mp4'}; case 'F4A': return {ext: 'f4a', mime: 'audio/mp4'}; case 'F4B': return {ext: 'f4b', mime: 'audio/mp4'}; case 'crx': return {ext: 'cr3', mime: 'image/x-canon-cr3'}; default: if (brandMajor.startsWith('3g')) { if (brandMajor.startsWith('3g2')) { return {ext: '3g2', mime: 'video/3gpp2'}; } return {ext: '3gp', mime: 'video/3gpp'}; } return {ext: 'mp4', mime: 'video/mp4'}; } } // -- 10-byte signatures -- if (this.checkString('REGEDIT4\r\n')) { return { ext: 'reg', mime: 'application/x-ms-regedit', }; } // -- 12-byte signatures -- // RIFF file format which might be AVI, WAV, QCP, etc if (this.check([0x52, 0x49, 0x46, 0x46])) { if (this.checkString('WEBP', {offset: 8})) { return { ext: 'webp', mime: 'image/webp', }; } if (this.check([0x41, 0x56, 0x49], {offset: 8})) { return { ext: 'avi', mime: 'video/vnd.avi', }; } if (this.check([0x57, 0x41, 0x56, 0x45], {offset: 8})) { return { ext: 'wav', mime: 'audio/wav', }; } // QLCM, QCP file if (this.check([0x51, 0x4C, 0x43, 0x4D], {offset: 8})) { return { ext: 'qcp', mime: 'audio/qcelp', }; } } if (this.check([0x49, 0x49, 0x55, 0x00, 0x18, 0x00, 0x00, 0x00, 0x88, 0xE7, 0x74, 0xD8])) { return { ext: 'rw2', mime: 'image/x-panasonic-rw2', }; } // ASF_Header_Object first 80 bytes if (this.check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) { return detectAsf(tokenizer); } if (this.check([0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A])) { return { ext: 'ktx', mime: 'image/ktx', }; } if ((this.check([0x7E, 0x10, 0x04]) || this.check([0x7E, 0x18, 0x04])) && this.check([0x30, 0x4D, 0x49, 0x45], {offset: 4})) { return { ext: 'mie', mime: 'application/x-mie', }; } if (this.check([0x27, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], {offset: 2})) { return { ext: 'shp', mime: 'application/x-esri-shape', }; } if (this.check([0xFF, 0x4F, 0xFF, 0x51])) { return { ext: 'j2c', mime: 'image/j2c', }; } if (this.check([0x00, 0x00, 0x00, 0x0C, 0x6A, 0x50, 0x20, 0x20, 0x0D, 0x0A, 0x87, 0x0A])) { // JPEG-2000 family await tokenizer.ignore(20); const type = await tokenizer.readToken(new Token.StringType(4, 'ascii')); switch (type) { case 'jp2 ': return { ext: 'jp2', mime: 'image/jp2', }; case 'jpx ': return { ext: 'jpx', mime: 'image/jpx', }; case 'jpm ': return { ext: 'jpm', mime: 'image/jpm', }; case 'mjp2': return { ext: 'mj2', mime: 'image/mj2', }; default: return; } } if ( this.check([0xFF, 0x0A]) || this.check([0x00, 0x00, 0x00, 0x0C, 0x4A, 0x58, 0x4C, 0x20, 0x0D, 0x0A, 0x87, 0x0A]) ) { return { ext: 'jxl', mime: 'image/jxl', }; } if (this.check([0xFE, 0xFF])) { // UTF-16-BOM-BE if (this.checkString('<?xml ', {offset: 2, encoding: 'utf-16be'})) { return { ext: 'xml', mime: 'application/xml', }; } return undefined; // Some unknown text based format } if (this.check([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1])) { // Detected Microsoft Compound File Binary File (MS-CFB) Format. return { ext: 'cfb', mime: 'application/x-cfb', }; } // Increase sample size from 32 to 256. await tokenizer.peekBuffer(this.buffer, {length: Math.min(256, tokenizer.fileInfo.size), mayBeLess: true}); if (this.check([0x61, 0x63, 0x73, 0x70], {offset: 36})) { return { ext: 'icc', mime: 'application/vnd.iccprofile', }; } // ACE: requires 14 bytes in the buffer if (this.checkString('**ACE', {offset: 7}) && this.checkString('**', {offset: 12})) { return { ext: 'ace', mime: 'application/x-ace-compressed', }; } // -- 15-byte signatures -- if (this.checkString('BEGIN:')) { if (this.checkString('VCARD', {offset: 6})) { return { ext: 'vcf', mime: 'text/vcard', }; } if (this.checkString('VCALENDAR', {offset: 6})) { return { ext: 'ics', mime: 'text/calendar', }; } } // `raf` is here just to keep all the raw image detectors together. if (this.checkString('FUJIFILMCCD-RAW')) { return { ext: 'raf', mime: 'image/x-fujifilm-raf', }; } if (this.checkString('Extended Module:')) { return { ext: 'xm', mime: 'audio/x-xm', }; } if (this.checkString('Creative Voice File')) { return { ext: 'voc', mime: 'audio/x-voc', }; } if (this.check([0x04, 0x00, 0x00, 0x00]) && this.buffer.length >= 16) { // Rough & quick check Pickle/ASAR const jsonSize = new DataView(this.buffer.buffer).getUint32(12, true); if (jsonSize > 12 && this.buffer.length >= jsonSize + 16) { try { const header = new TextDecoder().decode(this.buffer.subarray(16, jsonSize + 16)); const json = JSON.parse(header); // Check if Pickle is ASAR if (json.files) { // Final check, assuring Pickle/ASAR format return { ext: 'asar', mime: 'application/x-asar', }; } } catch {} } } if (this.check([0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x02])) { return { ext: 'mxf', mime: 'application/mxf', }; } if (this.checkString('SCRM', {offset: 44})) { return { ext: 's3m', mime: 'audio/x-s3m', }; } // Raw MPEG-2 transport stream (188-byte packets) if (this.check([0x47]) && this.check([0x47], {offset: 188})) { return { ext: 'mts', mime: 'video/mp2t', }; } // Blu-ray Disc Audio-Video (BDAV) MPEG-2 transport stream has 4-byte TP_extra_header before each 188-byte packet if (this.check([0x47], {offset: 4}) && this.check([0x47], {offset: 196})) { return { ext: 'mts', mime: 'video/mp2t', }; } if (this.check([0x42, 0x4F, 0x4F, 0x4B, 0x4D, 0x4F, 0x42, 0x49], {offset: 60})) { return { ext: 'mobi', mime: 'application/x-mobipocket-ebook', }; } if (this.check([0x44, 0x49, 0x43, 0x4D], {offset: 128})) { return { ext: 'dcm', mime: 'application/dicom', }; } if (this.check([0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46])) { return { ext: 'lnk', mime: 'application/x-ms-shortcut', // Informal, used by freedesktop.org shared-mime-info }; } if (this.check([0x62, 0x6F, 0x6F, 0x6B, 0x00, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x72, 0x6B, 0x00, 0x00, 0x00, 0x00])) { return { ext: 'alias', mime: 'application/x-ft-apple.alias', }; } if (this.checkString('Kaydara FBX Binary \u0000')) { return { ext: 'fbx', mime: 'application/x-ft-fbx', }; } if ( this.check([0x4C, 0x50], {offset: 34}) && ( this.check([0x00, 0x00, 0x01], {offset: 8}) || this.check([0x01, 0x00, 0x02], {offset: 8}) || this.check([0x02, 0x00, 0x02], {offset: 8}) ) ) { return { ext: 'eot', mime: 'application/vnd.ms-fontobject', }; } if (this.check([0x06, 0x06, 0xED, 0xF5, 0xD8, 0x1D, 0x46, 0xE5, 0xBD, 0x31, 0xEF, 0xE7, 0xFE, 0x74, 0xB7, 0x1D])) { return { ext: 'indd', mime: 'application/x-indesign', }; } // -- 16-byte signatures -- // JMP files - check for both Little Endian and Big Endian signatures if (this.check([0xFF, 0xFF, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00]) || this.check([0x00, 0x00, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x01, 0x00, 0x01])) { return { ext: 'jmp', mime: 'application/x-jmp-data', }; } // Increase sample size from 256 to 512 await tokenizer.peekBuffer(this.buffer, {length: Math.min(512, tokenizer.fileInfo.size), mayBeLess: true}); // Requires a buffer size of 512 bytes if ((this.checkString('ustar', {offset: 257}) && (this.checkString('\0', {offset: 262}) || this.checkString(' ', {offset: 262}))) || (this.check([0, 0, 0, 0, 0, 0], {offset: 257}) && tarHeaderChecksumMatches(this.buffer))) { return { ext: 'tar', mime: 'application/x-tar', }; } if (this.check([0xFF, 0xFE])) { // UTF-16-BOM-LE const encoding = 'utf-16le'; if (this.checkString('<?xml ', {offset: 2, encoding})) { return { ext: 'xml', mime: 'application/xml', }; } if (this.check([0xFF, 0x0E], {offset: 2}) && this.checkString('SketchUp Model', {offset: 4, encoding})) { return { ext: 'skp', mime: 'application/vnd.sketchup.skp', }; } if (this.checkString('Windows Registry Editor Version 5.00\r\n', {offset: 2, encoding})) { return { ext: 'reg', mime: 'application/x-ms-regedit', }; } return undefined; // Some text based format } if (this.checkString('-----BEGIN PGP MESSAGE-----')) { return { ext: 'pgp', mime: 'application/pgp-encrypted', }; } }; // Detections with limited supporting data, resulting in a higher likelihood of false positives detectImprecise = async tokenizer => { this.buffer = new Uint8Array(reasonableDetectionSizeInBytes); const fileSize = getKnownFileSizeOrMaximum(tokenizer.fileInfo.size); // Read initial sample size of 8 bytes await tokenizer.peekBuffer(this.buffer, {length: Math.min(8, fileSize), mayBeLess: true}); if ( this.check([0x0, 0x0, 0x1, 0xBA]) || this.check([0x0, 0x0, 0x1, 0xB3]) ) { return { ext: 'mpg', mime: 'video/mpeg', }; } if (this.check([0x00, 0x01, 0x00, 0x00, 0x00])) { return { ext: 'ttf', mime: 'font/ttf', }; } if (this.check([0x00, 0x00, 0x01, 0x00])) { return { ext: 'ico', mime: 'image/x-icon', }; } if (this.check([0x00, 0x00, 0x02, 0x00])) { return { ext: 'cur', mime: 'image/x-icon', }; } // Adjust buffer to `mpegOffsetTolerance` await tokenizer.peekBuffer(this.buffer, {length: Math.min(2 + this.options.mpegOffsetTolerance, fileSize), mayBeLess: true}); // Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE) if (this.buffer.length >= (2 + this.options.mpegOffsetTolerance)) { for (let depth = 0; depth <= this.options.mpegOffsetTolerance; ++depth) { const type = this.scanMpeg(depth); if (type) { return type; } } } }; async readTiffTag(bigEndian) { const tagId = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE); await this.tokenizer.ignore(10); switch (tagId) { case 50_341: return { ext: 'arw', mime: 'image/x-sony-arw', }; case 50_706: return { ext: 'dng', mime: 'image/x-adobe-dng', }; default: } } async readTiffIFD(bigEndian) { const numberOfTags = await this.tokenizer.readToken(bigEndian ? Token.UINT16_BE : Token.UINT16_LE); if (numberOfTags > maximumTiffTagCount) { return; } if ( hasUnknownFileSize(this.tokenizer) && (2 + (numberOfTags * 12)) > maximumTiffIfdOffsetInBytes ) { return; } for (let n = 0; n < numberOfTags; ++n) { const fileType = await this.readTiffTag(bigEndian); if (fileType) { return fileType; } } } async readTiffHeader(bigEndian) { const tiffFileType = { ext: 'tif', mime: 'image/tiff', }; const version = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 2); const ifdOffset = (bigEndian ? Token.UINT32_BE : Token.UINT32_LE).get(this.buffer, 4); if (version === 42) { // TIFF file header if (ifdOffset >= 6) { if (this.checkString('CR', {offset: 8})) { return { ext: 'cr2', mime: 'image/x-canon-cr2', }; } if (ifdOffset >= 8) { const someId1 = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 8); const someId2 = (bigEndian ? Token.UINT16_BE : Token.UINT16_LE).get(this.buffer, 10); if ( (someId1 === 0x1C && someId2 === 0xFE) || (someId1 === 0x1F && someId2 === 0x0B)) { return { ext: 'nef', mime: 'image/x-nikon-nef', }; } } } if ( hasUnknownFileSize(this.tokenizer) && ifdOffset > maximumTiffStreamIfdOffsetInBytes ) { return tiffFileType; } const maximumTiffOffset = hasUnknownFileSize(this.tokenizer) ? maximumTiffIfdOffsetInBytes : this.tokenizer.fileInfo.size; try { await safeIgnore(this.tokenizer, ifdOffset, { maximumLength: maximumTiffOffset, reason: 'TIFF IFD offset', }); } catch (error) { if (error instanceof strtok3.EndOfStreamError) { return; } throw error; } let fileType; try { fileType = await this.readTiffIFD(bigEndian); } catch (error) { if (error instanceof strtok3.EndOfStreamError) { return; } throw error; } return fileType ?? tiffFileType; } if (version === 43) { // Big TIFF file header return tiffFileType; } } /** Scan check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE). @param offset - Offset to scan for sync-preamble. @returns {{ext: string, mime: string}} */ scanMpeg(offset) { if (this.check([0xFF, 0xE0], {offset, mask: [0xFF, 0xE0]})) { if (this.check([0x10], {offset: offset + 1, mask: [0x16]})) { // Check for (ADTS) MPEG-2 if (this.check([0x08], {offset: offset + 1, mask: [0x08]})) { return { ext: 'aac', mime: 'audio/aac', }; } // Must be (ADTS) MPEG-4 return { ext: 'aac', mime: 'audio/aac', }; } // MPEG 1 or 2 Layer 3 header // Check for MPEG layer 3 if (this.check([0x02], {offset: offset + 1, mask: [0x06]})) { return { ext: 'mp3', mime: 'audio/mpeg', }; } // Check for MPEG layer 2 if (this.check([0x04], {offset: offset + 1, mask: [0x06]})) { return { ext: 'mp2', mime: 'audio/mpeg', }; } // Check for MPEG layer 1 if (this.check([0x06], {offset: offset + 1, mask: [0x06]})) { return { ext: 'mp1', mime: 'audio/mpeg', }; } } } } export const supportedExtensions = new Set(extensions); export const supportedMimeTypes = new Set(mimeTypes); export async function fileTypeFromFile(path, options) { return (new FileTypeParser(options)).fromFile(path); }