@sitemark/exifr
Version:
📑 The fastest and most versatile JavaScript EXIF reading library.
336 lines (278 loc) • 10.3 kB
JavaScript
'use strict';
var parser = require('./parser.js');
var buffUtil = require('./buff-util.js');
var options = require('./options.js');
var fs = require('./fs.js');
// TODO: - minified UMD bundle
// TODO: - offer two UMD bundles (with tags.mjs dictionary and without)
// TODO: - API for including 3rd party XML parser
// TODO: - better code & file structure
class Reader {
constructor(options$1) {
this.options = options.processOptions(options$1);
}
async read(arg) {
if (typeof arg === 'string')
return this.readString(arg)
else if (buffUtil.isBrowser && !buffUtil.isWebWorker && arg instanceof HTMLImageElement)
return this.readString(arg.src)
else if (buffUtil.hasBuffer && Buffer.isBuffer(arg))
return this.readBuffer(arg)
else if (arg instanceof Uint8Array)
return this.readUint8Array(arg)
else if (arg instanceof ArrayBuffer)
return this.readArrayBuffer(arg)
else if (arg instanceof DataView)
return this.readBuffer(arg)
else if (buffUtil.isBrowser && arg instanceof Blob)
return this.readBlob(arg)
else
throw new Error(`Invalid input argument: ${arg} (${typeof arg})`)
}
readString(string) {
if (isBase64Url(string))
return this.readBase64(string)
else if (buffUtil.isBrowser)
return this.readUrl(string)
else if (buffUtil.isNode)
return this.readFileFromDisk(string)
else
throw new Error(`Invalid string input argument: ${string} (isBrowser: ${buffUtil.isBrowser}, isNode: ${buffUtil.isNode})`)
}
readUint8Array(uint8arr) {
return this.readArrayBuffer(uint8arr.buffer)
}
readArrayBuffer(arrayBuffer) {
return this.readBuffer(new DataView(arrayBuffer))
}
readBuffer(buffer) {
let tiffPosition = parser.findTiff(buffer);
if (tiffPosition === undefined) return
return [buffer, tiffPosition]
}
async readBlob(blob) {
this.reader = new BlobReader(blob, this.options);
return this.reader.read(this.options.parseChunkSize)
}
async readUrl(url) {
this.reader = new UrlReader(url, this.options);
return this.reader.read(this.options.parseChunkSize)
}
async readBase64(base64) {
this.reader = new Base64Reader(base64, this.options);
return this.reader.read(this.options.seekChunkSize)
}
async readFileFromDisk(filePath) {
this.reader = new FsReader(filePath, this.options);
return this.reader.read()
}
get mode() {
return this.reader ? 'chunked' : 'whole'
}
}
// This method came through three iterations. Tested with 4MB file with EXIF at the beginning.
// iteration #1 - Fetch whole file.
// - Took about 23ms on average.
// - It meant unnecessary conversion of whole 4MB
// iteration #2 - Fetch first 512 bytes, find exif, then fetch additional kilobytes of exif to be parsed.
// - Exactly like what we do with Node's readFile() method.
// - Slightly faster. 18ms on average.
// - Certainly more efficient processing-wise. Only beginning of the file was read and converted.
// - But the additional read of the exif chunk is expensive time-wise because browser's fetch and
// - Blob<->ArrayBuffer manipulations are not as fast as Node's low-level fs.open() & fs.read().
// iteration #3 - This one we landed on.
// - 11ms on average. (As fast as Node)
// - Compromise between time and processing costs.
// - Fetches first 64KB of the file. In most cases, EXIF isn't larger than that.
// - In most cases, the 64KB is enough and we don't need additional fetch/convert operation.
// - But we can do the second read if needed (edge cases) where the performance wouldn't be great anyway.
// It can be used with Blobs, URLs, Base64 (URL).
// blobs and fetching from url uses larger chunks with higher chances of having the whole exif within (iteration 3).
// base64 string (and base64 based url) uses smaller chunk at first (iteration 2).
// Accepts file path and uses lower-level FS APIs to open the file, read the first 512 bytes
// trying to locate EXIF and then reading only the portion of the file where EXIF is if found.
// If the EXIF is not found within the first 512 Bytes. the range can be adjusted by user,
// or it falls back to reading the whole file if enabled with options.allowWholeFile.
class ChunkedReader {
constructor(input, options) {
this.input = input;
this.options = options;
}
async read(size) {
// Reading additional segments (XMP, ICC, IPTC) requires whole file to be loaded.
// Chunked reading is only available for simple exif (APP1) FTD0
if (this.forceWholeFile) return this.readWhole()
// Read Chunk
let view = await this.readChunked(size);
if (view) return view
// Seeking for the exif at the beginning of the file failed.
// Fall back to scanning throughout the whole file if allowed.
if (this.allowWholeFile) return this.readWhole()
}
get allowWholeFile() {
if (this.options.wholeFile === false) return false
return this.options.wholeFile === true
|| this.options.wholeFile === undefined
}
get forceWholeFile() {
if (this.allowWholeFile === false) return false
return this.options.wholeFile === true
|| this.needWholeFile
}
get needWholeFile() {
return !!this.options.xmp
|| !!this.options.icc
|| !!this.options.iptc
}
destroy() {}
}
class FsReader extends ChunkedReader {
async readWhole() {
let buffer = await fs.fs().readFile(this.input);
let tiffPosition = parser.findTiff(buffer);
return [buffer, tiffPosition]
}
async readChunk({start, size}) {
let chunk = Buffer.allocUnsafe(size);
await this.fh.read(chunk, 0, size, start);
return chunk
}
async readChunked() {
this.fh = await fs.fs().open(this.input, 'r');
try {
var seekChunk = Buffer.allocUnsafe(this.options.seekChunkSize);
var {bytesRead} = await this.fh.read(seekChunk, 0, seekChunk.length, null);
if (!bytesRead) return this.destroy()
// Try to search for beginning of exif within the first 512 bytes.
var tiffPosition = parser.findTiff(seekChunk);
if (tiffPosition && tiffPosition.start && tiffPosition.size) {
// Exif was found. Allocate appropriately sized buffer and read the whole exif into the buffer.
// NOTE: does not load the whole file, just exif.
var tiffChunk = await this.readChunk(tiffPosition);
//await this.destroy()
return [tiffChunk, {start: 0}]
}
// Close FD/FileHandle since we're using lower-level APIs.
//await this.destroy()
} catch(err) {
// Try to close the FD/FileHandle in any case.
//await this.destroy()
throw err
}
}
// TODO: auto close file handle when reading and parsing is over
// (app can read more chunks after parsing the first)
async destroy() {
if (this.fh) {
await this.fh.close().catch(console.error);
this.fh = undefined;
}
}
}
class WebReader extends ChunkedReader {
async readWhole() {
let view = await this.readChunk();
let tiffPosition = parser.findTiff(view);
return [view, tiffPosition]
}
async readChunked(size) {
let start = 0;
let end = size;
let view = await this.readChunk({start, end, size});
let tiffPosition = parser.findTiff(view);
if (tiffPosition !== undefined) {
// Exif was found.
if (tiffPosition.end > view.byteLength) {
// Exif was found outside the buffer we alread have.
// We need to do additional fetch to get the whole exif at the location we found from the first chunk.
view = await this.readChunk(tiffPosition);
return [view, {start: 0}]
} else {
return [view, tiffPosition]
}
}
}
}
function sanitizePosition(position = {}) {
let {start, size, end} = position;
if (start === undefined) return {start: 0}
if (size !== undefined)
end = start + size;
else if (end !== undefined)
size = end - start;
return {start, size, end}
}
class Base64Reader extends WebReader {
// Accepts base64 or base64 URL and converts it to DataView and trims if needed.
readChunk(position) {
let {start, end} = sanitizePosition(position);
// Remove the mime type and base64 marker at the beginning so that we're left off with clear b64 string.
let base64 = this.input.replace(/^data\:([^\;]+)\;base64,/gmi, '');
if (buffUtil.hasBuffer) {
// TODO: Investigate. this might not work if bundled Buffer is used in browser.
// the slice/subarray shared memory viewed through DataView problem
var arrayBuffer = Buffer
.from(base64, 'base64')
.slice(start, end)
.buffer;
} else {
var offset = 0;
// NOTE: Each 4 character block of base64 string represents 3 bytes of data.
if (start !== undefined || end !== undefined) {
if (start === undefined) {
var blockStart = start = 0;
} else {
var blockStart = Math.floor(start / 3) * 4;
offset = start - ((blockStart / 4) * 3);
}
if (end === undefined) {
var blockEnd = base64.length;
end = (blockEnd / 4) * 3;
} else {
var blockEnd = Math.ceil(end / 3) * 4;
}
base64 = base64.slice(blockStart, blockEnd);
var targetSize = end - start;
} else {
var targetSize = (base64.length / 4) * 3;
}
var binary = atob(base64);
var arrayBuffer = new ArrayBuffer(targetSize);
var uint8arr = new Uint8Array(arrayBuffer);
for (var i = 0; i < targetSize; i++)
uint8arr[i] = binary.charCodeAt(offset + i);
}
return new DataView(arrayBuffer)
}
}
class UrlReader extends WebReader {
async readChunk(position) {
let {start, end} = sanitizePosition(position);
let url = this.input;
let headers = {};
if (start || end) headers.range = `bytes=${[start, end].join('-')}`;
let res = await fetch(url, {headers});
let chunk = new DataView(await res.arrayBuffer());
return chunk
}
}
class BlobReader extends WebReader {
readChunk(position) {
let {start, end} = sanitizePosition(position);
let blob = this.input;
if (end) blob = blob.slice(start, end);
return new Promise((resolve, reject) => {
let reader = new FileReader();
reader.onloadend = () => resolve(new DataView(reader.result || new ArrayBuffer(0)));
reader.onerror = reject;
reader.readAsArrayBuffer(blob);
})
}
}
// HELPER FUNCTIONS
function isBase64Url(string) {
return string.startsWith('data:')
|| string.length > 10000 // naive
// || string.startsWith('/9j/') // expects JPG to always start the same
}
module.exports = Reader;