UNPKG

detect-content-type

Version:

detect-content-type implements the algorithm described at http://mimesniff.spec.whatwg.org/ to determine the Content-Type of the given data. It considers at most the first 512 bytes of data. It always returns a valid MIME type: if it cannot determine a mo

327 lines (305 loc) 6.93 kB
// detectContentType implements the algorithm described at // http://mimesniff.spec.whatwg.org/ to determine the Content-Type of the given // data. It considers at most the first 512 bytes of data. // // detectContentType always returns a valid MIME type: if it cannot determine a // more specific one, it returns "application/octet-stream". export default function detectContentType(content) { const data = content.slice(0, 512) let firstNonWS = 0 while (firstNonWS < data.length && isWS(data[firstNonWS])) { firstNonWS++ } for (let sig of sniffSignatures) { let ct = sig.match(data, firstNonWS) if (ct != '') { return ct } } return 'application/octet-stream' // fallback } function isWS(b) { switch (b) { case '\t'.charCodeAt(0): case '\n'.charCodeAt(0): case ',0x0c'.charCodeAt(0): case '\r'.charCodeAt(0): case ' '.charCodeAt(0): return true } return false } class exactSig { constructor(sig, ct) { Object.assign(this, { sig, ct }) } match(data) { if (Buffer.compare(this.sig, data.slice(0, this.sig.length)) == 0) { return this.ct } return '' } } class maskedSig { constructor(mask, pat, skipWS, ct) { Object.assign(this, { mask, pat, skipWS, ct }) } match(data, firstNonWS) { if (this.skipWS) { data = data.slice(firstNonWS) } if (this.pat.length != this.mask.length) { return '' } if (data.length < this.mask.length) { return '' } for (let i = 0; i < this.mask.length; i++) { let db = data[i] & this.mask[i] if (db != this.pat[i]) { return '' } } return this.ct } } class htmlSig { constructor(h) { this.h = Buffer.from(h) } match(data, firstNonWS) { data = data.slice(firstNonWS) if (data.length < this.h.length + 1) { return '' } for (let i = 0; i < this.h.length; i++) { let b = this.h[i] let db = data[i] if ('A'.charCodeAt(0) <= b && b <= 'Z'.charCodeAt(0)) { db &= 0xdf } if (b != db) { return '' } } // Next byte must be space or right angle bracket. let db = String.fromCharCode(data[this.h.length]) if (db != ' ' && db != '>') { return '' } return 'text/html; charset=utf-8' } } let mp4ftype = Buffer.from('ftyp') let mp4 = Buffer.from('mp4') class mp4Sig { match(data) { // https://mimesniff.spec.whatwg.org/#signature-for-mp4 // c.f. section 6.2.1 if (data.length < 12) { return '' } let boxSize = data.readUInt32BE(0) if (boxSize % 4 != 0 || data.length < boxSize) { return '' } if (Buffer.compare(data.slice(4, 8), mp4ftype) != 0) { return '' } for (let st = 8; st < boxSize; st += 4) { if (st == 12) { // minor version number continue } if (Buffer.compare(data.slice(st, st + 3), mp4) == 0) { return 'video/mp4' } } return '' } } class textSig { match(data, firstNonWS) { // c.f. section 5, step 4 for (let b of data.slice(firstNonWS)) { if ( b <= 0x08 || b == 0x0b || (0x0e <= b && b <= 0x1a) || (0x1c <= b && b <= 0x1f) ) { return '' } } return 'text/plain; charset=utf-8' } } const sniffSignatures = [ new htmlSig('<!DOCTYPE HTML'), new htmlSig('<HTML'), new htmlSig('<HEAD'), new htmlSig('<SCRIPT'), new htmlSig('<IFRAME'), new htmlSig('<H1'), new htmlSig('<DIV'), new htmlSig('<FONT'), new htmlSig('<TABLE'), new htmlSig('<A'), new htmlSig('<STYLE'), new htmlSig('<TITLE'), new htmlSig('<B'), new htmlSig('<BODY'), new htmlSig('<BR'), new htmlSig('<P'), new htmlSig('<!--'), new maskedSig( Buffer.from([0xff, 0xff, 0xff, 0xff, 0xff]), Buffer.from('<?xml'), true, 'text/xml; charset=utf-8' ), new exactSig(Buffer.from('%PDF-'), 'application/pdf'), new exactSig(Buffer.from('%!PS-Adobe-'), 'application/postscript'), // UTF BOMs. new maskedSig( Buffer.from([0xff, 0xff, 0x00, 0x00]), Buffer.from([0xfe, 0xff, 0x00, 0x00]), false, 'text/plain; charset=utf-16be' ), new maskedSig( Buffer.from([0xff, 0xff, 0x00, 0x00]), Buffer.from([0xff, 0xfe, 0x00, 0x00]), false, 'text/plain; charset=utf-16le' ), new maskedSig( Buffer.from([0xff, 0xff, 0xff, 0x00]), Buffer.from([0xef, 0xbb, 0xbf, 0x00]), false, 'text/plain; charset=utf-8' ), new exactSig(Buffer.from('GIF87a'), 'image/gif'), new exactSig(Buffer.from('GIF89a'), 'image/gif'), new exactSig( Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a]), 'image/png' ), new exactSig(Buffer.from([0xff, 0xd8, 0xff]), 'image/jpeg'), new exactSig(Buffer.from('BM'), 'image/bmp'), new maskedSig( Buffer.from([ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff ]), Buffer.from('RIFF\x00\x00\x00\x00WEBPVP'), false, 'image/webp' ), new exactSig( Buffer.from([0x00, 0x00, 0x01, 0x00]), 'image/vnd.microsoft.icon' ), new maskedSig( Buffer.from([ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff ]), Buffer.from('RIFF\x00\x00\x00\x00WAVE'), false, 'audio/wave' ), new maskedSig( Buffer.from([ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff ]), Buffer.from('FORM\x00\x00\x00\x00AIFF'), false, 'audio/aiff' ), new maskedSig( Buffer.from([0xff, 0xff, 0xff, 0xff]), Buffer.from('.snd'), false, 'audio/basic' ), new maskedSig( Buffer.from([0xff, 0xff, 0xff, 0xff, 0xff]), Buffer.from('OggS\x00'), false, 'application/ogg' ), new maskedSig( Buffer.from([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]), Buffer.concat([Buffer.from('MThd'), Buffer.from([0x00, 0x00, 0x00, 0x06])]), false, 'audio/midi' ), new maskedSig( Buffer.from([0xff, 0xff, 0xff]), Buffer.from('ID3'), false, 'audio/mpeg' ), new maskedSig( Buffer.from([ 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff ]), Buffer.from('RIFF\x00\x00\x00\x00AVI '), false, 'video/avi' ), new exactSig(Buffer.from([0x1a, 0x45, 0xdf, 0xa3]), 'video/webm'), new exactSig( Buffer.from([0x52, 0x61, 0x72, 0x20, 0x1a, 0x07, 0x00]), 'application/x-rar-compressed' ), new exactSig(Buffer.from([0x50, 0x4b, 0x03, 0x04]), 'application/zip'), new exactSig(Buffer.from([0x1f, 0x8b, 0x08]), 'application/x-gzip'), new mp4Sig(), new textSig() // should be last ]