whatwg-mimetype
Version:
Parses, serializes, and manipulates MIME types, according to the WHATWG MIME Sniffing Standard
752 lines (694 loc) • 23.9 kB
JavaScript
"use strict";
const MIMEType = require("./mime-type.js");
// Normalize a MIME type input (string or MIMEType-like object) to a MIMEType.
// Returns null if parsing fails (including for undefined input).
function normalizeMIMEType(input) {
return MIMEType.parse(`${input}`);
}
// https://mimesniff.spec.whatwg.org/#xml-mime-type
function isXMLMIMEType(mimeType) {
return mimeType.subtype.endsWith("+xml") ||
(mimeType.type === "text" && mimeType.subtype === "xml") ||
(mimeType.type === "application" && mimeType.subtype === "xml");
}
// https://mimesniff.spec.whatwg.org/#html-mime-type
function isHTMLMIMEType(mimeType) {
return mimeType.type === "text" && mimeType.subtype === "html";
}
// https://mimesniff.spec.whatwg.org/#resource-header
const RESOURCE_HEADER_LENGTH = 1445;
function getResourceHeader(resource) {
if (resource.length <= RESOURCE_HEADER_LENGTH) {
return resource;
}
return resource.subarray(0, RESOURCE_HEADER_LENGTH);
}
// https://mimesniff.spec.whatwg.org/#image-mime-type
function isImageMIMEType(mimeType) {
return mimeType.type === "image";
}
// https://mimesniff.spec.whatwg.org/#audio-or-video-mime-type
function isAudioOrVideoMIMEType(mimeType) {
return mimeType.type === "audio" ||
mimeType.type === "video" ||
(mimeType.type === "application" && mimeType.subtype === "ogg");
}
// https://mimesniff.spec.whatwg.org/#whitespace-byte
function isWhitespaceByte(byte) {
return byte === 0x09 || byte === 0x0A || byte === 0x0C || byte === 0x0D || byte === 0x20;
}
// https://mimesniff.spec.whatwg.org/#binary-data-byte
function isBinaryDataByte(byte) {
return (byte >= 0x00 && byte <= 0x08) ||
byte === 0x0B ||
(byte >= 0x0E && byte <= 0x1A) ||
(byte >= 0x1C && byte <= 0x1F);
}
// https://mimesniff.spec.whatwg.org/#pattern-matching-algorithm
function matchesSignature(resource, signature) {
const { pattern, mask, ignoredLeadingBytes, mimeType } = signature;
let s = 0;
if (ignoredLeadingBytes) {
while (s < resource.length && ignoredLeadingBytes(resource[s])) {
s++;
}
}
if (resource.length < s + pattern.length) {
return null;
}
for (let i = 0; i < pattern.length; i++) {
if ((resource[s + i] & mask[i]) !== (pattern[i] & mask[i])) {
return null;
}
}
return mimeType;
}
// https://mimesniff.spec.whatwg.org/#rules-for-identifying-an-unknown-mime-type
const step1Table = [
// >= 1;
++numberSize;
}
return numberSize;
}
// https://mimesniff.spec.whatwg.org/#matching-a-padded-sequence
function matchPaddedSequence(sequence, offset, pattern) {
// Skip leading 0x00 bytes
while (offset < sequence.length && sequence[offset] === 0x00) {
offset++;
}
// Check if pattern matches at current offset
if (sequence.length < offset + pattern.length) {
return false;
}
for (let i = 0; i < pattern.length; i++) {
if (sequence[offset + i] !== pattern[i]) {
return false;
}
}
return true;
}
// https://mimesniff.spec.whatwg.org/#signature-for-webm
function matchWebM(resource) {
const { length } = resource;
// Step 3: If length < 4, return false
if (length < 4) {
return null;
}
// Step 4: Check EBML header 0x1A 0x45 0xDF 0xA3
if (resource[0] !== 0x1A || resource[1] !== 0x45 ||
resource[2] !== 0xDF || resource[3] !== 0xA3) {
return null;
}
// Step 5-6: Search for DocType element (0x42 0x82) in bytes 4-37
let iter = 4;
while (iter < length && iter < 38) {
if (iter + 1 < length && resource[iter] === 0x42 && resource[iter + 1] === 0x82) {
iter += 2;
if (iter >= length) {
break;
}
const numberSize = parseVint(resource, iter);
iter += numberSize;
if (iter >= length - 4) {
break;
}
// Match padded sequence "webm" (0x77 0x65 0x62 0x6D)
if (matchPaddedSequence(resource, iter, [0x77, 0x65, 0x62, 0x6D])) {
return "video/webm";
}
}
iter++;
}
return null;
}
// https://mimesniff.spec.whatwg.org/#signature-for-mp3-without-id3
// Bitrate tables (kbps) indexed by bitrate-index (0-15)
// https://mimesniff.spec.whatwg.org/#mp3-rates-table
const mp3Rates = [0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0];
const mp25Rates = [0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0];
// Sample rate table indexed by samplerate-index (0-3)
// https://mimesniff.spec.whatwg.org/#mp3-sample-rate-table
const sampleRates = [44100, 48000, 32000, 0];
// https://mimesniff.spec.whatwg.org/#match-an-mp3-header
function matchMP3Header(sequence, s) {
const { length } = sequence;
// Step 1: If length < s + 4, return false
if (length < s + 4) {
return false;
}
// Step 2: If sequence[s] ≠ 0xFF or sequence[s+1] & 0xE0 ≠ 0xE0, return false
if (sequence[s] !== 0xFF || (sequence[s + 1] & 0xE0) !== 0xE0) {
return false;
}
// Step 3: Extract layer
const layer = (sequence[s + 1] & 0x06) >> 1;
// Step 4: If layer is 0, return false
if (layer === 0) {
return false;
}
// Step 5: Extract bit-rate index, return false if 15
const bitRate = (sequence[s + 2] & 0xF0) >> 4;
if (bitRate === 15) {
return false;
}
// Step 6: Extract sample-rate index, return false if 3
const sampleRate = (sequence[s + 2] & 0x0C) >> 2;
if (sampleRate === 3) {
return false;
}
// Step 9: Check final-layer (layer must be 3 for MP3)
const finalLayer = (4 - layer) & 0x03;
if (finalLayer !== 3) {
return false;
}
// Step 10: Return true
return true;
}
// https://mimesniff.spec.whatwg.org/#parse-an-mp3-frame
function parseMP3Frame(sequence, s) {
// Step 1: Extract version
const version = (sequence[s + 1] & 0x18) >> 3;
// Step 2: Extract bitrate-index
const bitrateIndex = (sequence[s + 2] & 0xF0) >> 4;
// Step 3+4: Get bitrate from appropriate table
const bitrate = (version & 0x01) !== 0 ? mp3Rates[bitrateIndex] : mp25Rates[bitrateIndex];
// Step 5: Extract samplerate-index
const samplerateIndex = (sequence[s + 2] & 0x0C) >> 2;
// Step 6: Get samplerate
const samplerate = sampleRates[samplerateIndex];
// Step 7: Extract pad
const pad = (sequence[s + 2] & 0x02) >> 1;
return { version, bitrate, samplerate, pad };
}
// https://mimesniff.spec.whatwg.org/#compute-an-mp3-frame-size
function computeMP3FrameSize(version, bitrate, samplerate, pad) {
// Step 1: Determine scale based on version
const scale = version === 1 ? 72 : 144;
// Step 2: Compute size
let size = Math.floor((bitrate * 1000 * scale) / samplerate);
// Step 3: Add padding if present
if (pad !== 0) {
size += 1;
}
// Step 4: Return size
return size;
}
// https://mimesniff.spec.whatwg.org/#signature-for-mp3-without-id3
function matchMP3WithoutID3(resource) {
const { length } = resource;
// Step 2: Let s be 0
let s = 0;
// Step 3: If match mp3 header returns false, return false
if (!matchMP3Header(resource, s)) {
return null;
}
// Step 4: Parse an mp3 frame
const { version, bitrate, samplerate, pad } = parseMP3Frame(resource, s);
// Step 5: Compute frame size
const skippedBytes = computeMP3FrameSize(version, bitrate, samplerate, pad);
// Step 6: If skipped-bytes < 4 or skipped-bytes > length - s, return false
if (skippedBytes < 4 || skippedBytes > length - s) {
return null;
}
// Step 7: Increment s by skipped-bytes
s += skippedBytes;
// Step 8: If match mp3 header returns false, return false; otherwise return true
if (!matchMP3Header(resource, s)) {
return null;
}
return "audio/mpeg";
}
// https://mimesniff.spec.whatwg.org/#matching-an-archive-type-pattern
const archiveSignatures = [
{ pattern: [0x1F, 0x8B, 0x08], mask: [0xFF, 0xFF, 0xFF], mimeType: "application/x-gzip" },
{ pattern: [0x50, 0x4B, 0x03, 0x04], mask: [0xFF, 0xFF, 0xFF, 0xFF], mimeType: "application/zip" },
{
pattern: [0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00],
mask: [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF],
mimeType: "application/x-rar-compressed"
}
];
// https://mimesniff.spec.whatwg.org/#matching-an-image-type-pattern
function matchImageType(resource) {
for (const sig of imageSignatures) {
const result = matchesSignature(resource, sig);
if (result) {
return result;
}
}
return null;
}
// https://mimesniff.spec.whatwg.org/#matching-an-audio-or-video-type-pattern
function matchAudioOrVideoType(resource) {
for (const sig of audioVideoSignatures) {
const result = matchesSignature(resource, sig);
if (result) {
return result;
}
}
const mp4Result = matchMP4(resource);
if (mp4Result) {
return mp4Result;
}
const webmResult = matchWebM(resource);
if (webmResult) {
return webmResult;
}
const mp3Result = matchMP3WithoutID3(resource);
if (mp3Result) {
return mp3Result;
}
return null;
}
// https://mimesniff.spec.whatwg.org/#rules-for-text-or-binary
function distinguishTextOrBinary(resourceHeader) {
// Step 1: Let length be the number of bytes in the resource header.
const { length } = resourceHeader;
// Step 2: If the first 2 bytes match a UTF-16 BOM, return "text/plain".
if (length >= 2) {
// UTF-16 BE BOM
if (resourceHeader[0] === 0xFE && resourceHeader[1] === 0xFF) {
return "text/plain";
}
// UTF-16 LE BOM
if (resourceHeader[0] === 0xFF && resourceHeader[1] === 0xFE) {
return "text/plain";
}
}
// Step 3: If the first 3 bytes match the UTF-8 BOM, return "text/plain".
if (length >= 3) {
if (resourceHeader[0] === 0xEF && resourceHeader[1] === 0xBB && resourceHeader[2] === 0xBF) {
return "text/plain";
}
}
// Step 4: If the resource header contains no binary data bytes, return "text/plain".
for (let i = 0; i < length; i++) {
if (isBinaryDataByte(resourceHeader[i])) {
// Step 5: Return "application/octet-stream".
return "application/octet-stream";
}
}
return "text/plain";
}
// https://mimesniff.spec.whatwg.org/#rules-for-identifying-an-unknown-mime-type
function identifyAnUnknownMIMEType(resourceHeader, { sniffScriptable = false } = {}) {
// Step 1
if (sniffScriptable) {
for (const sig of step1Table) {
const result = matchesSignature(resourceHeader, sig);
if (result) {
return result;
}
}
}
// Step 2
for (const sig of step2Table) {
const result = matchesSignature(resourceHeader, sig);
if (result) {
return result;
}
}
// Step 3: image type pattern matching
for (const sig of imageSignatures) {
const result = matchesSignature(resourceHeader, sig);
if (result) {
return result;
}
}
// Step 4: audio/video type pattern matching
// https://mimesniff.spec.whatwg.org/#matching-an-audio-or-video-type-pattern
for (const sig of audioVideoSignatures) {
const result = matchesSignature(resourceHeader, sig);
if (result) {
return result;
}
}
// Then check MP4, WebM, and MP3-without-ID3 signatures
const mp4Result = matchMP4(resourceHeader);
if (mp4Result) {
return mp4Result;
}
const webmResult = matchWebM(resourceHeader);
if (webmResult) {
return webmResult;
}
const mp3Result = matchMP3WithoutID3(resourceHeader);
if (mp3Result) {
return mp3Result;
}
// Step 5: archive type pattern matching
for (const sig of archiveSignatures) {
const result = matchesSignature(resourceHeader, sig);
if (result) {
return result;
}
}
// Step 6: If resource header contains no binary data bytes, return text/plain
for (let i = 0; i < resourceHeader.length; i++) {
if (isBinaryDataByte(resourceHeader[i])) {
// Step 7: return application/octet-stream
return "application/octet-stream";
}
}
return "text/plain";
}
// Apache bug values that trigger text/binary sniffing
// https://mimesniff.spec.whatwg.org/#supplied-mime-type-detection-algorithm
const apacheBugValues = new Set([
"text/plain",
"text/plain; charset=ISO-8859-1",
"text/plain; charset=iso-8859-1",
"text/plain; charset=UTF-8"
]);
// https://mimesniff.spec.whatwg.org/#supplied-mime-type-detection-algorithm
function detectSuppliedMIMEType({ contentTypeHeader, providedType }) {
let suppliedMIMEType = null;
let checkForApacheBug = false;
if (contentTypeHeader !== undefined) {
// Step 2: HTTP Content-Type header
suppliedMIMEType = normalizeMIMEType(contentTypeHeader);
if (suppliedMIMEType !== null && typeof contentTypeHeader === "string") {
checkForApacheBug = apacheBugValues.has(contentTypeHeader);
}
} else if (providedType !== undefined) {
// Steps 3-4: Filesystem or other protocol
suppliedMIMEType = normalizeMIMEType(providedType);
}
// Step 5: If parsing failed, suppliedMIMEType remains null (undefined per spec)
return { suppliedMIMEType, checkForApacheBug };
}
/**
* Determine the computed MIME type of a resource.
* https://mimesniff.spec.whatwg.org/#determining-the-computed-mime-type-of-a-resource
*
* @param {Uint8Array} resource - The resource bytes
* @param {object} options - Options object
* @param {string} options.contentTypeHeader - The Content-Type header value (for HTTP resources)
* @param {string} options.providedType - MIME type from filesystem or other protocol (for non-HTTP resources)
* @param {boolean} options.noSniff - Whether the X-Content-Type-Options: nosniff header was present
* @param {function} options.isSupported - Predicate to check if an image/audio/video MIME type is supported
* @returns {MIMEType} The computed MIME type
*/
module.exports = function computedMIMEType(
resource,
{ contentTypeHeader, providedType, noSniff = false, isSupported = () => true } = {}
) {
const resourceHeader = getResourceHeader(resource);
const { suppliedMIMEType, checkForApacheBug } = detectSuppliedMIMEType({ contentTypeHeader, providedType });
// Step 1: If the supplied MIME type is an XML MIME type or HTML MIME type, return it
if (suppliedMIMEType !== null && (isXMLMIMEType(suppliedMIMEType) || isHTMLMIMEType(suppliedMIMEType))) {
return suppliedMIMEType;
}
// Step 2: If supplied MIME type is undefined, or its essence is "unknown/unknown",
// "application/unknown", or "*/*", execute rules for identifying an unknown MIME type
if (suppliedMIMEType === null ||
suppliedMIMEType.essence === "unknown/unknown" ||
suppliedMIMEType.essence === "application/unknown" ||
suppliedMIMEType.essence === "*/*") {
// sniff-scriptable flag is the inverse of no-sniff flag
return new MIMEType(identifyAnUnknownMIMEType(resourceHeader, { sniffScriptable: !noSniff }));
}
// Step 3: If the no-sniff flag is set, return the supplied MIME type
if (noSniff) {
return suppliedMIMEType;
}
// Step 4: If the check-for-apache-bug flag is set, execute rules for distinguishing
// if a resource is text or binary
if (checkForApacheBug) {
return new MIMEType(distinguishTextOrBinary(resourceHeader));
}
// Steps 5-6: If supplied MIME type is a supported image MIME type, execute image pattern matching
if (isImageMIMEType(suppliedMIMEType) && isSupported(suppliedMIMEType)) {
const imageResult = matchImageType(resourceHeader);
if (imageResult !== null) {
return new MIMEType(imageResult);
}
}
// Steps 7-8: If supplied MIME type is a supported audio/video type, execute audio/video matching
if (isAudioOrVideoMIMEType(suppliedMIMEType) && isSupported(suppliedMIMEType)) {
const avResult = matchAudioOrVideoType(resourceHeader);
if (avResult !== null) {
return new MIMEType(avResult);
}
}
// Step 9: Return the supplied MIME type
return suppliedMIMEType;
};