file-type
Version:
Detect the file type of a file, stream, or data
219 lines (162 loc) • 9.07 kB
TypeScript
/**
Typings for primary entry point, Node.js specific typings can be found in index.d.ts
*/
import type {ReadableStream as WebReadableStream} from 'node:stream/web';
import type {ITokenizer, AnyWebByteStream} from 'strtok3';
/**
Either the Node.js ReadableStream or the `lib.dom.d.ts` ReadableStream.
Related issue: https://github.com/DefinitelyTyped/DefinitelyTyped/pull/60377
*/
export type AnyWebReadableStream<G> = WebReadableStream<G> | ReadableStream<G>;
export type FileTypeResult = {
/**
One of the supported [file types](https://github.com/sindresorhus/file-type#supported-file-types).
*/
readonly ext: string;
/**
The detected [MIME type](https://en.wikipedia.org/wiki/Internet_media_type).
*/
readonly mime: string;
};
/**
Detect the file type of a `Uint8Array`, or `ArrayBuffer`.
The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
If file access is available, it is recommended to use `.fromFile()` instead.
@param buffer - An Uint8Array or ArrayBuffer representing file data. It works best if the buffer contains the entire file. It may work with a smaller portion as well.
@returns The detected file type, or `undefined` when there is no match.
*/
export function fileTypeFromBuffer(buffer: Uint8Array | ArrayBuffer): Promise<FileTypeResult | undefined>;
/**
Detect the file type of a [web `ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream).
The file type is detected by checking the [magic number](https://en.wikipedia.org/wiki/Magic_number_(programming)#Magic_numbers_in_files) of the buffer.
@param stream - A [web `ReadableStream`](https://developer.mozilla.org/en-US/docs/Web/API/ReadableStream) streaming a file to examine.
@returns A `Promise` for an object with the detected file type, or `undefined` when there is no match.
*/
export function fileTypeFromStream(stream: AnyWebByteStream): Promise<FileTypeResult | undefined>;
/**
Detect the file type from an [`ITokenizer`](https://github.com/Borewit/strtok3#tokenizer) source.
This method is used internally, but can also be used for a special "tokenizer" reader.
A tokenizer propagates the internal read functions, allowing alternative transport mechanisms, to access files, to be implemented and used.
@param tokenizer - File source implementing the tokenizer interface.
@returns The detected file type, or `undefined` when there is no match.
An example is [`@tokenizer/http`](https://github.com/Borewit/tokenizer-http), which requests data using [HTTP-range-requests](https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests). A difference with a conventional stream and the [*tokenizer*](https://github.com/Borewit/strtok3#tokenizer), is that it is able to *ignore* (seek, fast-forward) in the stream. For example, you may only need and read the first 6 bytes, and the last 128 bytes, which may be an advantage in case reading the entire file would take longer.
@example
```
import {makeTokenizer} from '@tokenizer/http';
import {fileTypeFromTokenizer} from 'file-type';
const audioTrackUrl = 'https://test-audio.netlify.com/Various%20Artists%20-%202009%20-%20netBloc%20Vol%2024_%20tiuqottigeloot%20%5BMP3-V2%5D/01%20-%20Diablo%20Swing%20Orchestra%20-%20Heroines.mp3';
const httpTokenizer = await makeTokenizer(audioTrackUrl);
const fileType = await fileTypeFromTokenizer(httpTokenizer);
console.log(fileType);
//=> {ext: 'mp3', mime: 'audio/mpeg'}
```
*/
export function fileTypeFromTokenizer(tokenizer: ITokenizer): Promise<FileTypeResult | undefined>;
/**
Supported file extensions.
*/
export const supportedExtensions: ReadonlySet<string>;
/**
Supported MIME types.
*/
export const supportedMimeTypes: ReadonlySet<string>;
export type StreamOptions = {
/**
The default sample size in bytes.
@default 4100
*/
readonly sampleSize?: number;
};
/**
Detect the file type of a [`Blob`](https://nodejs.org/api/buffer.html#class-blob) or [`File`](https://developer.mozilla.org/en-US/docs/Web/API/File).
@param blob - The [`Blob`](https://nodejs.org/api/buffer.html#class-blob) used for file detection.
@returns The detected file type, or `undefined` when there is no match.
@example
```
import {fileTypeFromBlob} from 'file-type';
const blob = new Blob(['<?xml version="1.0" encoding="ISO-8859-1" ?>'], {
type: 'text/plain',
endings: 'native'
});
console.log(await fileTypeFromBlob(blob));
//=> {ext: 'txt', mime: 'text/plain'}
```
*/
export declare function fileTypeFromBlob(blob: Blob): Promise<FileTypeResult | undefined>;
/**
A custom file type detector.
Detectors can be added via the constructor options or by directly modifying `FileTypeParser#detectors`.
Detectors provided through the constructor options are executed before the default detectors.
Custom detectors allow for:
- Introducing new `FileTypeResult` entries.
- Modifying the detection behavior of existing `FileTypeResult` types.
### Detector execution flow
If a detector returns `undefined`, the following rules apply:
1. **No Tokenizer Interaction**: If the detector does not modify the tokenizer's position, the next detector in the sequence is executed.
2. **Tokenizer Interaction**: If the detector modifies the tokenizer's position (`tokenizer.position` is advanced), no further detectors are executed. In this case, the file type remains `undefined`, as subsequent detectors cannot evaluate the content. This is an exceptional scenario, as it prevents any other detectors from determining the file type.
### Example usage
Below is an example of a custom detector array. This can be passed to the `FileTypeParser` via the `fileTypeOptions` argument.
```
import {FileTypeParser} from 'file-type';
const customDetectors = [
async tokenizer => {
const unicornHeader = [85, 78, 73, 67, 79, 82, 78]; // "UNICORN" in ASCII decimal
const buffer = new Uint8Array(unicornHeader.length);
await tokenizer.peekBuffer(buffer, {length: unicornHeader.length, mayBeLess: true});
if (unicornHeader.every((value, index) => value === buffer[index])) {
return {ext: 'unicorn', mime: 'application/unicorn'};
}
return undefined;
},
];
const buffer = new Uint8Array([85, 78, 73, 67, 79, 82, 78]);
const parser = new FileTypeParser({customDetectors});
const fileType = await parser.fromBuffer(buffer);
console.log(fileType); // {ext: 'unicorn', mime: 'application/unicorn'}
```
@param tokenizer - The [tokenizer](https://github.com/Borewit/strtok3#tokenizer) used to read file content.
@param fileType - The file type detected by standard or previous custom detectors, or `undefined` if no match is found.
@returns The detected file type, or `undefined` if no match is found.
*/
export type Detector = {
id: string;
detect: (tokenizer: ITokenizer, fileType?: FileTypeResult) => Promise<FileTypeResult | undefined>;
};
export type FileTypeOptions = {
customDetectors?: Iterable<Detector>;
};
export declare class TokenizerPositionError extends Error {
constructor(message?: string);
}
export type AnyWebReadableByteStreamWithFileType = AnyWebReadableStream<Uint8Array> & {
readonly fileType?: FileTypeResult;
};
/**
Returns a `Promise` which resolves to the original readable stream argument, but with an added `fileType` property, which is an object like the one returned from `fileTypeFromFile()`.
This method can be handy to put in a stream pipeline, but it comes with a price. Internally `stream()` builds up a buffer of `sampleSize` bytes, used as a sample, to determine the file type. The sample size impacts the file detection resolution. A smaller sample size will result in lower probability of the best file type detection.
*/
export function fileTypeStream(webStream: AnyWebReadableStream<Uint8Array>, options?: StreamOptions): Promise<AnyWebReadableByteStreamWithFileType>;
export declare class FileTypeParser {
/**
File type detectors.
Initialized with a single entry holding the built-in detector function.
*/
detectors: Detector[];
constructor(options?: {customDetectors?: Iterable<Detector>; signal?: AbortSignal});
/**
Works the same way as {@link fileTypeFromBuffer}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
fromBuffer(buffer: Uint8Array | ArrayBuffer): Promise<FileTypeResult | undefined>;
/**
Works the same way as {@link fileTypeFromTokenizer}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
fromTokenizer(tokenizer: ITokenizer): Promise<FileTypeResult | undefined>;
/**
Works the same way as {@link fileTypeFromBlob}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
fromBlob(blob: Blob): Promise<FileTypeResult | undefined>;
/**
Works the same way as {@link fileTypeStream}, additionally taking into account custom detectors (if any were provided to the constructor).
*/
toDetectionStream(webStream: AnyWebReadableStream<Uint8Array>, options?: StreamOptions): Promise<AnyWebReadableByteStreamWithFileType>;
}