@jedithepro/filetype.js
Version:
Detect the file type of a Buffer/Uint8Array/ArrayBuffer
1,426 lines (1,236 loc) • 31.6 kB
JavaScript
'use strict';
const Token = require('token-types');
const strtok3 = require('strtok3/lib/core');
const {
stringToBytes,
tarHeaderChecksumMatches,
uint32SyncSafeToken
} = require('./util');
const supported = require('./supported');
const minimumBytes = 4100; // A fair amount of file-types are detectable within this range
async function fromStream(stream) {
const tokenizer = await strtok3.fromStream(stream);
try {
return await fromTokenizer(tokenizer);
} finally {
await tokenizer.close();
}
}
async function fromBuffer(input) {
if (!(input instanceof Uint8Array || input instanceof ArrayBuffer || Buffer.isBuffer(input))) {
throw new TypeError(`Expected the \`input\` argument to be of type \`Uint8Array\` or \`Buffer\` or \`ArrayBuffer\`, got \`${typeof input}\``);
}
const buffer = input instanceof Buffer ? input : Buffer.from(input);
if (!(buffer && buffer.length > 1)) {
return;
}
const tokenizer = strtok3.fromBuffer(buffer);
return fromTokenizer(tokenizer);
}
function _check(buffer, headers, options) {
options = {
offset: 0,
...options
};
for (const [index, header] of headers.entries()) {
// If a bitmask is set
if (options.mask) {
// If header doesn't equal `buf` with bits masked off
if (header !== (options.mask[index] & buffer[index + options.offset])) {
return false;
}
} else if (header !== buffer[index + options.offset]) {
return false;
}
}
return true;
}
async function _checkSequence(sequence, tokenizer, ignoreBytes) {
const buffer = Buffer.alloc(minimumBytes);
await tokenizer.ignore(ignoreBytes);
await tokenizer.peekBuffer(buffer, {mayBeLess: true});
return buffer.includes(Buffer.from(sequence));
}
async function fromTokenizer(tokenizer) {
try {
return _fromTokenizer(tokenizer);
} catch (error) {
if (!(error instanceof strtok3.EndOfStreamError)) {
throw error;
}
}
}
async function _fromTokenizer(tokenizer) {
let buffer = Buffer.alloc(minimumBytes);
const bytesRead = 12;
const check = (header, options) => _check(buffer, header, options);
const checkString = (header, options) => check(stringToBytes(header), options);
const checkSequence = (sequence, ignoreBytes) => _checkSequence(sequence, tokenizer, ignoreBytes);
// Keep reading until EOF if the file size is unknown.
if (!tokenizer.fileInfo.size) {
tokenizer.fileInfo.size = Number.MAX_SAFE_INTEGER;
}
await tokenizer.peekBuffer(buffer, {length: bytesRead, mayBeLess: true});
// -- 2-byte signatures --
if (check([0x42, 0x4D])) {
return {
ext: 'bmp',
mime: 'image/bmp'
};
}
if (check([0x0B, 0x77])) {
return {
ext: 'ac3',
mime: 'audio/vnd.dolby.dd-raw'
};
}
if (check([0x78, 0x01])) {
return {
ext: 'dmg',
mime: 'application/x-apple-diskimage'
};
}
if (check([0x4D, 0x5A])) {
return {
ext: 'exe',
mime: 'application/x-msdownload'
};
}
if (check([0x25, 0x21])) {
await tokenizer.peekBuffer(buffer, {length: 24, mayBeLess: true});
if (checkString('PS-Adobe-', {offset: 2}) &&
checkString(' EPSF-', {offset: 14})) {
return {
ext: 'eps',
mime: 'application/eps'
};
}
return {
ext: 'ps',
mime: 'application/postscript'
};
}
if (
check([0x1F, 0xA0]) ||
check([0x1F, 0x9D])
) {
return {
ext: 'Z',
mime: 'application/x-compress'
};
}
// -- 3-byte signatures --
if (check([0xFF, 0xD8, 0xFF])) {
return {
ext: 'jpg',
mime: 'image/jpeg'
};
}
if (check([0x49, 0x49, 0xBC])) {
return {
ext: 'jxr',
mime: 'image/vnd.ms-photo'
};
}
if (check([0x1F, 0x8B, 0x8])) {
return {
ext: 'gz',
mime: 'application/gzip'
};
}
if (check([0x42, 0x5A, 0x68])) {
return {
ext: 'bz2',
mime: 'application/x-bzip2'
};
}
if (checkString('ID3')) {
await tokenizer.ignore(6); // Skip ID3 header until the header size
const id3HeaderLen = await tokenizer.readToken(uint32SyncSafeToken);
if (tokenizer.position + id3HeaderLen > tokenizer.fileInfo.size) {
// Guess file type based on ID3 header for backward compatibility
return {
ext: 'mp3',
mime: 'audio/mpeg'
};
}
await tokenizer.ignore(id3HeaderLen);
return fromTokenizer(tokenizer); // Skip ID3 header, recursion
}
// Musepack, SV7
if (checkString('MP+')) {
return {
ext: 'mpc',
mime: 'audio/x-musepack'
};
}
if (
(buffer[0] === 0x43 || buffer[0] === 0x46) &&
check([0x57, 0x53], {offset: 1})
) {
return {
ext: 'swf',
mime: 'application/x-shockwave-flash'
};
}
// -- 4-byte signatures --
if (check([0x47, 0x49, 0x46])) {
return {
ext: 'gif',
mime: 'image/gif'
};
}
if (checkString('FLIF')) {
return {
ext: 'flif',
mime: 'image/flif'
};
}
if (checkString('8BPS')) {
return {
ext: 'psd',
mime: 'image/vnd.adobe.photoshop'
};
}
if (checkString('WEBP', {offset: 8})) {
return {
ext: 'webp',
mime: 'image/webp'
};
}
// Musepack, SV8
if (checkString('MPCK')) {
return {
ext: 'mpc',
mime: 'audio/x-musepack'
};
}
if (checkString('FORM')) {
return {
ext: 'aif',
mime: 'audio/aiff'
};
}
if (checkString('icns', {offset: 0})) {
return {
ext: 'icns',
mime: 'image/icns'
};
}
// Zip-based file formats
// Need to be before the `zip` check
if (check([0x50, 0x4B, 0x3, 0x4])) { // Local file header signature
try {
while (tokenizer.position + 30 < tokenizer.fileInfo.size) {
await tokenizer.readBuffer(buffer, {length: 30});
// https://en.wikipedia.org/wiki/Zip_(file_format)#File_headers
const zipHeader = {
compressedSize: buffer.readUInt32LE(18),
uncompressedSize: buffer.readUInt32LE(22),
filenameLength: buffer.readUInt16LE(26),
extraFieldLength: buffer.readUInt16LE(28)
};
zipHeader.filename = await tokenizer.readToken(new Token.StringType(zipHeader.filenameLength, 'utf-8'));
await tokenizer.ignore(zipHeader.extraFieldLength);
// Assumes signed `.xpi` from addons.mozilla.org
if (zipHeader.filename === 'META-INF/mozilla.rsa') {
return {
ext: 'xpi',
mime: 'application/x-xpinstall'
};
}
if (zipHeader.filename.endsWith('.rels') || zipHeader.filename.endsWith('.xml')) {
const type = zipHeader.filename.split('/')[0];
switch (type) {
case '_rels':
break;
case 'word':
return {
ext: 'docx',
mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
};
case 'ppt':
return {
ext: 'pptx',
mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
};
case 'xl':
return {
ext: 'xlsx',
mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
};
default:
break;
}
}
if (zipHeader.filename.startsWith('xl/')) {
return {
ext: 'xlsx',
mime: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
};
}
// The docx, xlsx and pptx file types extend the Office Open XML file format:
// https://en.wikipedia.org/wiki/Office_Open_XML_file_formats
// We look for:
// - one entry named '[Content_Types].xml' or '_rels/.rels',
// - one entry indicating specific type of file.
// MS Office, OpenOffice and LibreOffice may put the parts in different order, so the check should not rely on it.
if (zipHeader.filename === 'mimetype' && zipHeader.compressedSize === zipHeader.uncompressedSize) {
const mimeType = await tokenizer.readToken(new Token.StringType(zipHeader.compressedSize, 'utf-8'));
switch (mimeType) {
case 'application/epub+zip':
return {
ext: 'epub',
mime: 'application/epub+zip'
};
case 'application/vnd.oasis.opendocument.text':
return {
ext: 'odt',
mime: 'application/vnd.oasis.opendocument.text'
};
case 'application/vnd.oasis.opendocument.spreadsheet':
return {
ext: 'ods',
mime: 'application/vnd.oasis.opendocument.spreadsheet'
};
case 'application/vnd.oasis.opendocument.presentation':
return {
ext: 'odp',
mime: 'application/vnd.oasis.opendocument.presentation'
};
default:
}
}
// Try to find next header manually when current one is corrupted
if (zipHeader.compressedSize === 0) {
let nextHeaderIndex = -1;
while (nextHeaderIndex < 0 && (tokenizer.position < tokenizer.fileInfo.size)) {
await tokenizer.peekBuffer(buffer, {mayBeLess: true});
nextHeaderIndex = buffer.indexOf('504B0304', 0, 'hex');
// Move position to the next header if found, skip the whole buffer otherwise
await tokenizer.ignore(nextHeaderIndex >= 0 ? nextHeaderIndex : buffer.length);
}
} else {
await tokenizer.ignore(zipHeader.compressedSize);
}
}
} catch (error) {
if (!(error instanceof strtok3.EndOfStreamError)) {
throw error;
}
}
return {
ext: 'zip',
mime: 'application/zip'
};
}
if (checkString('OggS')) {
// This is an OGG container
await tokenizer.ignore(28);
const type = Buffer.alloc(8);
await tokenizer.readBuffer(type);
// Needs to be before `ogg` check
if (_check(type, [0x4F, 0x70, 0x75, 0x73, 0x48, 0x65, 0x61, 0x64])) {
return {
ext: 'opus',
mime: 'audio/opus'
};
}
// If ' theora' in header.
if (_check(type, [0x80, 0x74, 0x68, 0x65, 0x6F, 0x72, 0x61])) {
return {
ext: 'ogv',
mime: 'video/ogg'
};
}
// If '\x01video' in header.
if (_check(type, [0x01, 0x76, 0x69, 0x64, 0x65, 0x6F, 0x00])) {
return {
ext: 'ogm',
mime: 'video/ogg'
};
}
// If ' FLAC' in header https://xiph.org/flac/faq.html
if (_check(type, [0x7F, 0x46, 0x4C, 0x41, 0x43])) {
return {
ext: 'oga',
mime: 'audio/ogg'
};
}
// 'Speex ' in header https://en.wikipedia.org/wiki/Speex
if (_check(type, [0x53, 0x70, 0x65, 0x65, 0x78, 0x20, 0x20])) {
return {
ext: 'spx',
mime: 'audio/ogg'
};
}
// If '\x01vorbis' in header
if (_check(type, [0x01, 0x76, 0x6F, 0x72, 0x62, 0x69, 0x73])) {
return {
ext: 'ogg',
mime: 'audio/ogg'
};
}
// Default OGG container https://www.iana.org/assignments/media-types/application/ogg
return {
ext: 'ogx',
mime: 'application/ogg'
};
}
if (
check([0x50, 0x4B]) &&
(buffer[2] === 0x3 || buffer[2] === 0x5 || buffer[2] === 0x7) &&
(buffer[3] === 0x4 || buffer[3] === 0x6 || buffer[3] === 0x8)
) {
return {
ext: 'zip',
mime: 'application/zip'
};
}
//
// File Type Box (https://en.wikipedia.org/wiki/ISO_base_media_file_format)
// It's not required to be first, but it's recommended to be. Almost all ISO base media files start with `ftyp` box.
// `ftyp` box must contain a brand major identifier, which must consist of ISO 8859-1 printable characters.
// Here we check for 8859-1 printable characters (for simplicity, it's a mask which also catches one non-printable character).
if (
checkString('ftyp', {offset: 4}) &&
(buffer[8] & 0x60) !== 0x00 // Brand major, first character ASCII?
) {
// They all can have MIME `video/mp4` except `application/mp4` special-case which is hard to detect.
// For some cases, we're specific, everything else falls to `video/mp4` with `mp4` extension.
const brandMajor = buffer.toString('binary', 8, 12).replace('\0', ' ').trim();
switch (brandMajor) {
case 'avif':
return {ext: 'avif', mime: 'image/avif'};
case 'mif1':
return {ext: 'heic', mime: 'image/heif'};
case 'msf1':
return {ext: 'heic', mime: 'image/heif-sequence'};
case 'heic':
case 'heix':
return {ext: 'heic', mime: 'image/heic'};
case 'hevc':
case 'hevx':
return {ext: 'heic', mime: 'image/heic-sequence'};
case 'qt':
return {ext: 'mov', mime: 'video/quicktime'};
case 'M4V':
case 'M4VH':
case 'M4VP':
return {ext: 'm4v', mime: 'video/x-m4v'};
case 'M4P':
return {ext: 'm4p', mime: 'video/mp4'};
case 'M4B':
return {ext: 'm4b', mime: 'audio/mp4'};
case 'M4A':
return {ext: 'm4a', mime: 'audio/x-m4a'};
case 'F4V':
return {ext: 'f4v', mime: 'video/mp4'};
case 'F4P':
return {ext: 'f4p', mime: 'video/mp4'};
case 'F4A':
return {ext: 'f4a', mime: 'audio/mp4'};
case 'F4B':
return {ext: 'f4b', mime: 'audio/mp4'};
case 'crx':
return {ext: 'cr3', mime: 'image/x-canon-cr3'};
default:
if (brandMajor.startsWith('3g')) {
if (brandMajor.startsWith('3g2')) {
return {ext: '3g2', mime: 'video/3gpp2'};
}
return {ext: '3gp', mime: 'video/3gpp'};
}
return {ext: 'mp4', mime: 'video/mp4'};
}
}
if (checkString('MThd')) {
return {
ext: 'mid',
mime: 'audio/midi'
};
}
if (
checkString('wOFF') &&
(
check([0x00, 0x01, 0x00, 0x00], {offset: 4}) ||
checkString('OTTO', {offset: 4})
)
) {
return {
ext: 'woff',
mime: 'font/woff'
};
}
if (
checkString('wOF2') &&
(
check([0x00, 0x01, 0x00, 0x00], {offset: 4}) ||
checkString('OTTO', {offset: 4})
)
) {
return {
ext: 'woff2',
mime: 'font/woff2'
};
}
if (check([0xD4, 0xC3, 0xB2, 0xA1]) || check([0xA1, 0xB2, 0xC3, 0xD4])) {
return {
ext: 'pcap',
mime: 'application/vnd.tcpdump.pcap'
};
}
// Sony DSD Stream File (DSF)
if (checkString('DSD ')) {
return {
ext: 'dsf',
mime: 'audio/x-dsf' // Non-standard
};
}
if (checkString('LZIP')) {
return {
ext: 'lz',
mime: 'application/x-lzip'
};
}
if (checkString('fLaC')) {
return {
ext: 'flac',
mime: 'audio/x-flac'
};
}
if (check([0x42, 0x50, 0x47, 0xFB])) {
return {
ext: 'bpg',
mime: 'image/bpg'
};
}
if (checkString('wvpk')) {
return {
ext: 'wv',
mime: 'audio/wavpack'
};
}
if (checkString('%PDF')) {
// Check if this is an Adobe Illustrator file
const isAiFile = await checkSequence('Adobe Illustrator', 1350);
if (isAiFile) {
return {
ext: 'ai',
mime: 'application/postscript'
};
}
// Assume this is just a normal PDF
return {
ext: 'pdf',
mime: 'application/pdf'
};
}
if (check([0x00, 0x61, 0x73, 0x6D])) {
return {
ext: 'wasm',
mime: 'application/wasm'
};
}
// TIFF, little-endian type
if (check([0x49, 0x49, 0x2A, 0x0])) {
if (checkString('CR', {offset: 8})) {
return {
ext: 'cr2',
mime: 'image/x-canon-cr2'
};
}
if (check([0x1C, 0x00, 0xFE, 0x00], {offset: 8}) || check([0x1F, 0x00, 0x0B, 0x00], {offset: 8})) {
return {
ext: 'nef',
mime: 'image/x-nikon-nef'
};
}
if (
check([0x08, 0x00, 0x00, 0x00], {offset: 4}) &&
(check([0x2D, 0x00, 0xFE, 0x00], {offset: 8}) ||
check([0x27, 0x00, 0xFE, 0x00], {offset: 8}))
) {
return {
ext: 'dng',
mime: 'image/x-adobe-dng'
};
}
buffer = Buffer.alloc(24);
await tokenizer.peekBuffer(buffer);
if (
(check([0x10, 0xFB, 0x86, 0x01], {offset: 4}) || check([0x08, 0x00, 0x00, 0x00], {offset: 4})) &&
// This pattern differentiates ARW from other TIFF-ish file types:
check([0x00, 0xFE, 0x00, 0x04, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x01], {offset: 9})
) {
return {
ext: 'arw',
mime: 'image/x-sony-arw'
};
}
return {
ext: 'tif',
mime: 'image/tiff'
};
}
// TIFF, big-endian type
if (check([0x4D, 0x4D, 0x0, 0x2A])) {
return {
ext: 'tif',
mime: 'image/tiff'
};
}
if (checkString('MAC ')) {
return {
ext: 'ape',
mime: 'audio/ape'
};
}
// https://github.com/threatstack/libmagic/blob/master/magic/Magdir/matroska
if (check([0x1A, 0x45, 0xDF, 0xA3])) { // Root element: EBML
async function readField() {
const msb = await tokenizer.peekNumber(Token.UINT8);
let mask = 0x80;
let ic = 0; // 0 = A, 1 = B, 2 = C, 3 = D
while ((msb & mask) === 0) {
++ic;
mask >>= 1;
}
const id = Buffer.alloc(ic + 1);
await tokenizer.readBuffer(id);
return id;
}
async function readElement() {
const id = await readField();
const lenField = await readField();
lenField[0] ^= 0x80 >> (lenField.length - 1);
const nrLen = Math.min(6, lenField.length); // JavaScript can max read 6 bytes integer
return {
id: id.readUIntBE(0, id.length),
len: lenField.readUIntBE(lenField.length - nrLen, nrLen)
};
}
async function readChildren(level, children) {
while (children > 0) {
const e = await readElement();
if (e.id === 0x4282) {
return tokenizer.readToken(new Token.StringType(e.len, 'utf-8')); // Return DocType
}
await tokenizer.ignore(e.len); // ignore payload
--children;
}
}
const re = await readElement();
const docType = await readChildren(1, re.len);
switch (docType) {
case 'webm':
return {
ext: 'webm',
mime: 'video/webm'
};
case 'matroska':
return {
ext: 'mkv',
mime: 'video/x-matroska'
};
default:
return;
}
}
// RIFF file format which might be AVI, WAV, QCP, etc
if (check([0x52, 0x49, 0x46, 0x46])) {
if (check([0x41, 0x56, 0x49], {offset: 8})) {
return {
ext: 'avi',
mime: 'video/vnd.avi'
};
}
if (check([0x57, 0x41, 0x56, 0x45], {offset: 8})) {
return {
ext: 'wav',
mime: 'audio/vnd.wave'
};
}
// QLCM, QCP file
if (check([0x51, 0x4C, 0x43, 0x4D], {offset: 8})) {
return {
ext: 'qcp',
mime: 'audio/qcelp'
};
}
}
if (checkString('SQLi')) {
return {
ext: 'sqlite',
mime: 'application/x-sqlite3'
};
}
if (check([0x4E, 0x45, 0x53, 0x1A])) {
return {
ext: 'nes',
mime: 'application/x-nintendo-nes-rom'
};
}
if (checkString('Cr24')) {
return {
ext: 'crx',
mime: 'application/x-google-chrome-extension'
};
}
if (
checkString('MSCF') ||
checkString('ISc(')
) {
return {
ext: 'cab',
mime: 'application/vnd.ms-cab-compressed'
};
}
if (check([0xED, 0xAB, 0xEE, 0xDB])) {
return {
ext: 'rpm',
mime: 'application/x-rpm'
};
}
if (check([0xC5, 0xD0, 0xD3, 0xC6])) {
return {
ext: 'eps',
mime: 'application/eps'
};
}
// -- 5-byte signatures --
if (check([0x4F, 0x54, 0x54, 0x4F, 0x00])) {
return {
ext: 'otf',
mime: 'font/otf'
};
}
if (checkString('#!AMR')) {
return {
ext: 'amr',
mime: 'audio/amr'
};
}
if (checkString('{\\rtf')) {
return {
ext: 'rtf',
mime: 'application/rtf'
};
}
if (check([0x46, 0x4C, 0x56, 0x01])) {
return {
ext: 'flv',
mime: 'video/x-flv'
};
}
if (checkString('IMPM')) {
return {
ext: 'it',
mime: 'audio/x-it'
};
}
if (
checkString('-lh0-', {offset: 2}) ||
checkString('-lh1-', {offset: 2}) ||
checkString('-lh2-', {offset: 2}) ||
checkString('-lh3-', {offset: 2}) ||
checkString('-lh4-', {offset: 2}) ||
checkString('-lh5-', {offset: 2}) ||
checkString('-lh6-', {offset: 2}) ||
checkString('-lh7-', {offset: 2}) ||
checkString('-lzs-', {offset: 2}) ||
checkString('-lz4-', {offset: 2}) ||
checkString('-lz5-', {offset: 2}) ||
checkString('-lhd-', {offset: 2})
) {
return {
ext: 'lzh',
mime: 'application/x-lzh-compressed'
};
}
// MPEG program stream (PS or MPEG-PS)
if (check([0x00, 0x00, 0x01, 0xBA])) {
// MPEG-PS, MPEG-1 Part 1
if (check([0x21], {offset: 4, mask: [0xF1]})) {
return {
ext: 'mpg', // May also be .ps, .mpeg
mime: 'video/MP1S'
};
}
// MPEG-PS, MPEG-2 Part 1
if (check([0x44], {offset: 4, mask: [0xC4]})) {
return {
ext: 'mpg', // May also be .mpg, .m2p, .vob or .sub
mime: 'video/MP2P'
};
}
}
// -- 6-byte signatures --
if (check([0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00])) {
return {
ext: 'xz',
mime: 'application/x-xz'
};
}
if (checkString('<?xml ')) {
return {
ext: 'xml',
mime: 'application/xml'
};
}
if (checkString('BEGIN:')) {
return {
ext: 'ics',
mime: 'text/calendar'
};
}
if (check([0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C])) {
return {
ext: '7z',
mime: 'application/x-7z-compressed'
};
}
if (
check([0x52, 0x61, 0x72, 0x21, 0x1A, 0x7]) &&
(buffer[6] === 0x0 || buffer[6] === 0x1)
) {
return {
ext: 'rar',
mime: 'application/x-rar-compressed'
};
}
if (checkString('solid ')) {
return {
ext: 'stl',
mime: 'model/stl'
};
}
// -- 7-byte signatures --
if (checkString('BLENDER')) {
return {
ext: 'blend',
mime: 'application/x-blender'
};
}
if (checkString('!<arch>')) {
await tokenizer.ignore(8);
const str = await tokenizer.readToken(new Token.StringType(13, 'ascii'));
if (str === 'debian-binary') {
return {
ext: 'deb',
mime: 'application/x-deb'
};
}
return {
ext: 'ar',
mime: 'application/x-unix-archive'
};
}
// -- 8-byte signatures --
if (check([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A])) {
// APNG format (https://wiki.mozilla.org/APNG_Specification)
// 1. Find the first IDAT (image data) chunk (49 44 41 54)
// 2. Check if there is an "acTL" chunk before the IDAT one (61 63 54 4C)
// Offset calculated as follows:
// - 8 bytes: PNG signature
// - 4 (length) + 4 (chunk type) + 13 (chunk data) + 4 (CRC): IHDR chunk
await tokenizer.ignore(8); // ignore PNG signature
async function readChunkHeader() {
return {
length: await tokenizer.readToken(Token.INT32_BE),
type: await tokenizer.readToken(new Token.StringType(4, 'binary'))
};
}
do {
const chunk = await readChunkHeader();
if (chunk.length < 0) {
return; // Invalid chunk length
}
switch (chunk.type) {
case 'IDAT':
return {
ext: 'png',
mime: 'image/png'
};
case 'acTL':
return {
ext: 'apng',
mime: 'image/apng'
};
default:
await tokenizer.ignore(chunk.length + 4); // Ignore chunk-data + CRC
}
} while (tokenizer.position + 8 < tokenizer.fileInfo.size);
return {
ext: 'png',
mime: 'image/png'
};
}
if (check([0x41, 0x52, 0x52, 0x4F, 0x57, 0x31, 0x00, 0x00])) {
return {
ext: 'arrow',
mime: 'application/x-apache-arrow'
};
}
if (check([0x67, 0x6C, 0x54, 0x46, 0x02, 0x00, 0x00, 0x00])) {
return {
ext: 'glb',
mime: 'model/gltf-binary'
};
}
// `mov` format variants
if (
check([0x66, 0x72, 0x65, 0x65], {offset: 4}) || // `free`
check([0x6D, 0x64, 0x61, 0x74], {offset: 4}) || // `mdat` MJPEG
check([0x6D, 0x6F, 0x6F, 0x76], {offset: 4}) || // `moov`
check([0x77, 0x69, 0x64, 0x65], {offset: 4}) // `wide`
) {
return {
ext: 'mov',
mime: 'video/quicktime'
};
}
// -- 9-byte signatures --
if (check([0x49, 0x49, 0x52, 0x4F, 0x08, 0x00, 0x00, 0x00, 0x18])) {
return {
ext: 'orf',
mime: 'image/x-olympus-orf'
};
}
// -- 12-byte signatures --
if (check([0x49, 0x49, 0x55, 0x00, 0x18, 0x00, 0x00, 0x00, 0x88, 0xE7, 0x74, 0xD8])) {
return {
ext: 'rw2',
mime: 'image/x-panasonic-rw2'
};
}
// ASF_Header_Object first 80 bytes
if (check([0x30, 0x26, 0xB2, 0x75, 0x8E, 0x66, 0xCF, 0x11, 0xA6, 0xD9])) {
async function readHeader() {
const guid = Buffer.alloc(16);
await tokenizer.readBuffer(guid);
return {
id: guid,
size: await tokenizer.readToken(Token.UINT64_LE)
};
}
await tokenizer.ignore(30);
// Search for header should be in first 1KB of file.
while (tokenizer.position + 24 < tokenizer.fileInfo.size) {
const header = await readHeader();
let payload = header.size - 24;
if (_check(header.id, [0x91, 0x07, 0xDC, 0xB7, 0xB7, 0xA9, 0xCF, 0x11, 0x8E, 0xE6, 0x00, 0xC0, 0x0C, 0x20, 0x53, 0x65])) {
// Sync on Stream-Properties-Object (B7DC0791-A9B7-11CF-8EE6-00C00C205365)
const typeId = Buffer.alloc(16);
payload -= await tokenizer.readBuffer(typeId);
if (_check(typeId, [0x40, 0x9E, 0x69, 0xF8, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
// Found audio:
return {
ext: 'asf',
mime: 'audio/x-ms-asf'
};
}
if (_check(typeId, [0xC0, 0xEF, 0x19, 0xBC, 0x4D, 0x5B, 0xCF, 0x11, 0xA8, 0xFD, 0x00, 0x80, 0x5F, 0x5C, 0x44, 0x2B])) {
// Found video:
return {
ext: 'asf',
mime: 'video/x-ms-asf'
};
}
break;
}
await tokenizer.ignore(payload);
}
// Default to ASF generic extension
return {
ext: 'asf',
mime: 'application/vnd.ms-asf'
};
}
if (check([0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A])) {
return {
ext: 'ktx',
mime: 'image/ktx'
};
}
if ((check([0x7E, 0x10, 0x04]) || check([0x7E, 0x18, 0x04])) && check([0x30, 0x4D, 0x49, 0x45], {offset: 4})) {
return {
ext: 'mie',
mime: 'application/x-mie'
};
}
if (check([0x27, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], {offset: 2})) {
return {
ext: 'shp',
mime: 'application/x-esri-shape'
};
}
if (check([0x00, 0x00, 0x00, 0x0C, 0x6A, 0x50, 0x20, 0x20, 0x0D, 0x0A, 0x87, 0x0A])) {
// JPEG-2000 family
await tokenizer.ignore(20);
const type = await tokenizer.readToken(new Token.StringType(4, 'ascii'));
switch (type) {
case 'jp2 ':
return {
ext: 'jp2',
mime: 'image/jp2'
};
case 'jpx ':
return {
ext: 'jpx',
mime: 'image/jpx'
};
case 'jpm ':
return {
ext: 'jpm',
mime: 'image/jpm'
};
case 'mjp2':
return {
ext: 'mj2',
mime: 'image/mj2'
};
default:
return;
}
}
// -- Unsafe signatures --
if (
check([0x0, 0x0, 0x1, 0xBA]) ||
check([0x0, 0x0, 0x1, 0xB3])
) {
return {
ext: 'mpg',
mime: 'video/mpeg'
};
}
if (check([0x00, 0x01, 0x00, 0x00, 0x00])) {
return {
ext: 'ttf',
mime: 'font/ttf'
};
}
if (check([0x00, 0x00, 0x01, 0x00])) {
return {
ext: 'ico',
mime: 'image/x-icon'
};
}
if (check([0x00, 0x00, 0x02, 0x00])) {
return {
ext: 'cur',
mime: 'image/x-icon'
};
}
if (check([0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1])) {
// Detected Microsoft Compound File Binary File (MS-CFB) Format.
return {
ext: 'cfb',
mime: 'application/x-cfb'
};
}
// Increase sample size from 12 to 256.
await tokenizer.peekBuffer(buffer, {length: Math.min(256, tokenizer.fileInfo.size), mayBeLess: true});
// `raf` is here just to keep all the raw image detectors together.
if (checkString('FUJIFILMCCD-RAW')) {
return {
ext: 'raf',
mime: 'image/x-fujifilm-raf'
};
}
if (checkString('Extended Module:')) {
return {
ext: 'xm',
mime: 'audio/x-xm'
};
}
if (checkString('Creative Voice File')) {
return {
ext: 'voc',
mime: 'audio/x-voc'
};
}
if (check([0x04, 0x00, 0x00, 0x00]) && buffer.length >= 16) { // Rough & quick check Pickle/ASAR
const jsonSize = buffer.readUInt32LE(12);
if (jsonSize > 12 && jsonSize < 240 && buffer.length >= jsonSize + 16) {
try {
const header = buffer.slice(16, jsonSize + 16).toString();
const json = JSON.parse(header);
// Check if Pickle is ASAR
if (json.files) { // Final check, assuring Pickle/ASAR format
return {
ext: 'asar',
mime: 'application/x-asar'
};
}
} catch (_) {
}
}
}
if (check([0x06, 0x0E, 0x2B, 0x34, 0x02, 0x05, 0x01, 0x01, 0x0D, 0x01, 0x02, 0x01, 0x01, 0x02])) {
return {
ext: 'mxf',
mime: 'application/mxf'
};
}
if (checkString('SCRM', {offset: 44})) {
return {
ext: 's3m',
mime: 'audio/x-s3m'
};
}
if (check([0x47], {offset: 4}) && (check([0x47], {offset: 192}) || check([0x47], {offset: 196}))) {
return {
ext: 'mts',
mime: 'video/mp2t'
};
}
if (check([0x42, 0x4F, 0x4F, 0x4B, 0x4D, 0x4F, 0x42, 0x49], {offset: 60})) {
return {
ext: 'mobi',
mime: 'application/x-mobipocket-ebook'
};
}
if (check([0x44, 0x49, 0x43, 0x4D], {offset: 128})) {
return {
ext: 'dcm',
mime: 'application/dicom'
};
}
if (check([0x4C, 0x00, 0x00, 0x00, 0x01, 0x14, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46])) {
return {
ext: 'lnk',
mime: 'application/x.ms.shortcut' // Invented by us
};
}
if (check([0x62, 0x6F, 0x6F, 0x6B, 0x00, 0x00, 0x00, 0x00, 0x6D, 0x61, 0x72, 0x6B, 0x00, 0x00, 0x00, 0x00])) {
return {
ext: 'alias',
mime: 'application/x.apple.alias' // Invented by us
};
}
if (
check([0x4C, 0x50], {offset: 34}) &&
(
check([0x00, 0x00, 0x01], {offset: 8}) ||
check([0x01, 0x00, 0x02], {offset: 8}) ||
check([0x02, 0x00, 0x02], {offset: 8})
)
) {
return {
ext: 'eot',
mime: 'application/vnd.ms-fontobject'
};
}
if (check([0x06, 0x06, 0xED, 0xF5, 0xD8, 0x1D, 0x46, 0xE5, 0xBD, 0x31, 0xEF, 0xE7, 0xFE, 0x74, 0xB7, 0x1D])) {
return {
ext: 'indd',
mime: 'application/x-indesign'
};
}
// Increase sample size from 256 to 512
await tokenizer.peekBuffer(buffer, {length: Math.min(512, tokenizer.fileInfo.size), mayBeLess: true});
// Requires a buffer size of 512 bytes
if (tarHeaderChecksumMatches(buffer)) {
return {
ext: 'tar',
mime: 'application/x-tar'
};
}
if (check([0xFF, 0xFE, 0xFF, 0x0E, 0x53, 0x00, 0x6B, 0x00, 0x65, 0x00, 0x74, 0x00, 0x63, 0x00, 0x68, 0x00, 0x55, 0x00, 0x70, 0x00, 0x20, 0x00, 0x4D, 0x00, 0x6F, 0x00, 0x64, 0x00, 0x65, 0x00, 0x6C, 0x00])) {
return {
ext: 'skp',
mime: 'application/vnd.sketchup.skp'
};
}
if (checkString('-----BEGIN PGP MESSAGE-----')) {
return {
ext: 'pgp',
mime: 'application/pgp-encrypted'
};
}
// Check for MPEG header at different starting offsets
for (let start = 0; start < 2 && start < (buffer.length - 16); start++) {
// Check MPEG 1 or 2 Layer 3 header, or 'layer 0' for ADTS (MPEG sync-word 0xFFE)
if (buffer.length >= start + 2 && check([0xFF, 0xE0], {offset: start, mask: [0xFF, 0xE0]})) {
if (check([0x10], {offset: start + 1, mask: [0x16]})) {
// Check for (ADTS) MPEG-2
if (check([0x08], {offset: start + 1, mask: [0x08]})) {
return {
ext: 'aac',
mime: 'audio/aac'
};
}
// Must be (ADTS) MPEG-4
return {
ext: 'aac',
mime: 'audio/aac'
};
}
// MPEG 1 or 2 Layer 3 header
// Check for MPEG layer 3
if (check([0x02], {offset: start + 1, mask: [0x06]})) {
return {
ext: 'mp3',
mime: 'audio/mpeg'
};
}
// Check for MPEG layer 2
if (check([0x04], {offset: start + 1, mask: [0x06]})) {
return {
ext: 'mp2',
mime: 'audio/mpeg'
};
}
// Check for MPEG layer 1
if (check([0x06], {offset: start + 1, mask: [0x06]})) {
return {
ext: 'mp1',
mime: 'audio/mpeg'
};
}
}
}
}
const stream = readableStream => new Promise((resolve, reject) => {
// Using `eval` to work around issues when bundling with Webpack
const stream = eval('require')('stream'); // eslint-disable-line no-eval
readableStream.on('error', reject);
readableStream.once('readable', async () => {
// Set up output stream
const pass = new stream.PassThrough();
let outputStream;
if (stream.pipeline) {
outputStream = stream.pipeline(readableStream, pass, () => {
});
} else {
outputStream = readableStream.pipe(pass);
}
// Read the input stream and detect the filetype
const chunk = readableStream.read(minimumBytes) || readableStream.read() || Buffer.alloc(0);
try {
const fileType = await fromBuffer(chunk);
pass.fileType = fileType;
} catch (error) {
reject(error);
}
resolve(outputStream);
});
});
const fileType = {
fromStream,
fromTokenizer,
fromBuffer,
stream
};
Object.defineProperty(fileType, 'extensions', {
get() {
return new Set(supported.extensions);
}
});
Object.defineProperty(fileType, 'mimeTypes', {
get() {
return new Set(supported.mimeTypes);
}
});
module.exports = fileType;