@tensorflow/tfjs-data
Version:
TensorFlow Data API in JavaScript
94 lines • 13.4 kB
JavaScript
/**
* @license
* Copyright 2018 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
// inspired by https://github.com/maxogden/filereader-stream
import { env, util } from '@tensorflow/tfjs-core';
import { ByteChunkIterator } from './byte_chunk_iterator';
/**
* Provide a stream of chunks from a File, Blob, or Uint8Array.
* @param file The source File, Blob or Uint8Array.
* @param options Optional settings controlling file reading.
* @returns a lazy Iterator of Uint8Arrays containing sequential chunks of the
* input File, Blob or Uint8Array.
*/
export class FileChunkIterator extends ByteChunkIterator {
constructor(file, options = {}) {
super();
this.file = file;
this.options = options;
util.assert((file instanceof Uint8Array) ||
(env().get('IS_BROWSER') ?
(file instanceof File || file instanceof Blob) :
false), () => 'FileChunkIterator only supports File, Blob and Uint8Array ' +
'right now.');
this.offset = options.offset || 0;
// default 1MB chunk has tolerable perf on large files
this.chunkSize = options.chunkSize || 1024 * 1024;
}
summary() {
return `FileChunks ${this.file}`;
}
async next() {
if (this.offset >= ((this.file instanceof Uint8Array) ?
this.file.byteLength :
this.file.size)) {
return { value: null, done: true };
}
const chunk = new Promise((resolve, reject) => {
const end = this.offset + this.chunkSize;
if (this.file instanceof Uint8Array) {
// Note if end > this.uint8Array.byteLength, we just get a small last
// chunk.
resolve(new Uint8Array(this.file.slice(this.offset, end)));
}
else {
// This branch assumes that this.file type is File or Blob, which
// means it is in the browser environment.
// TODO(soergel): is this a performance issue?
const fileReader = new FileReader();
fileReader.onload = (event) => {
let data = fileReader.result;
// Not sure we can trust the return type of
// FileReader.readAsArrayBuffer See e.g.
// https://github.com/node-file-api/FileReader/issues/2
if (data instanceof ArrayBuffer) {
data = new Uint8Array(data);
}
if (!(data instanceof Uint8Array)) {
return reject(new TypeError('FileReader returned unknown type.'));
}
resolve(data);
};
fileReader.onabort = (event) => {
return reject(new Error('Aborted'));
};
fileReader.onerror = (event) => {
return reject(new Error(event.type));
};
// TODO(soergel): better handle onabort, onerror
// Note if end > this.file.size, we just get a small last chunk.
const slice = this.file.slice(this.offset, end);
// We can't use readAsText here (even if we know the file is text)
// because the slice boundary may fall within a multi-byte character.
fileReader.readAsArrayBuffer(slice);
}
this.offset = end;
});
return { value: (await chunk), done: false };
}
}
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"file_chunk_iterator.js","sourceRoot":"","sources":["../../../../../../tfjs-data/src/iterators/file_chunk_iterator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,4DAA4D;AAC5D,OAAO,EAAC,GAAG,EAAE,IAAI,EAAC,MAAM,uBAAuB,CAAC;AAEhD,OAAO,EAAC,iBAAiB,EAAC,MAAM,uBAAuB,CAAC;AASxD;;;;;;GAMG;AACH,MAAM,OAAO,iBAAkB,SAAQ,iBAAiB;IAItD,YACc,IAAiB,EACjB,UAAoC,EAAE;QAClD,KAAK,EAAE,CAAC;QAFI,SAAI,GAAJ,IAAI,CAAa;QACjB,YAAO,GAAP,OAAO,CAA+B;QAElD,IAAI,CAAC,MAAM,CACP,CAAC,IAAI,YAAY,UAAU,CAAC;YACxB,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC;gBACrB,CAAC,IAAI,YAAY,IAAI,IAAI,IAAI,YAAY,IAAI,CAAC,CAAC,CAAC;gBAChD,KAAK,CAAC,EACf,GAAG,EAAE,CAAC,4DAA4D;YAC9D,YAAY,CAAC,CAAC;QACtB,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,CAAC,CAAC;QAClC,sDAAsD;QACtD,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,IAAI,GAAG,IAAI,CAAC;IACpD,CAAC;IAED,OAAO;QACL,OAAO,cAAc,IAAI,CAAC,IAAI,EAAE,CAAC;IACnC,CAAC;IAED,KAAK,CAAC,IAAI;QACR,IAAI,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,YAAY,UAAU,CAAC,CAAC,CAAC;YAC/B,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACtB,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE;YACvC,OAAO,EAAC,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAC,CAAC;SAClC;QACD,MAAM,KAAK,GAAG,IAAI,OAAO,CAAa,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACxD,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC;YACzC,IAAI,IAAI,CAAC,IAAI,YAAY,UAAU,EAAE;gBACnC,qEAAqE;gBACrE,SAAS;gBACT,OAAO,CAAC,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;aAC5D;iBAAM;gBACL,iEAAiE;gBACjE,0CAA0C;gBAE1C,8CAA8C;gBAC9C,MAAM,UAAU,GAAG,IAAI,UAAU,EAAE,CAAC;gBACpC,UAAU,CAAC,MAAM,GAAG,CAAC,KAAK,EAAE,EAAE;oBAC5B,IAAI,IAAI,GAAkC,UAAU,CAAC,MAAM,CAAC;oBAC5D,2CAA2C;oBAC3C,wCAAwC;oBACxC,uDAAuD;oBACvD,IAAI,IAAI,YAAY,WAAW,EAAE;wBAC/B,IAAI,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC;qBAC7B;oBACD,IAAI,CAAC,CAAC,IAAI,YAAY,UAAU,CAAC,EAAE;wBACjC,OAAO,MAAM,CAAC,IAAI,SAAS,CAAC,mCAAmC,CAAC,CAAC,CAAC;qBACnE;oBACD,OAAO,CAAC,IAAI,CAAC,CAAC;gBAChB,CAAC,CAAC;gBACF,UAAU,CAAC,OAAO,GAAG,CAAC,KAAK,EAAE,EAAE;oBAC7B,OAAO,MAAM,CAAC,IAAI,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC;gBACtC,CAAC,CAAC;gBACF,UAAU,CAAC,OAAO,GAAG,CAAC,KAAK,EAAE,EAAE;oBAC7B,OAAO,MAAM,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;gBACvC,CAAC,CAAC;gBACF,gDAAgD;gBAChD,gEAAgE;gBAChE,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;gBAChD,kEAAkE;gBAClE,qEAAqE;gBACrE,UAAU,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC;aACrC;YACD,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC;QACpB,CAAC,CAAC,CAAC;QACH,OAAO,EAAC,KAAK,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,IAAI,EAAE,KAAK,EAAC,CAAC;IAC7C,CAAC;CACF","sourcesContent":["/**\n * @license\n * Copyright 2018 Google LLC. All Rights Reserved.\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n *\n * =============================================================================\n */\n\n// inspired by https://github.com/maxogden/filereader-stream\nimport {env, util} from '@tensorflow/tfjs-core';\nimport {FileElement} from '../types';\nimport {ByteChunkIterator} from './byte_chunk_iterator';\n\nexport interface FileChunkIteratorOptions {\n  /** The byte offset at which to begin reading the File or Blob. Default 0. */\n  offset?: number;\n  /** The number of bytes to read at a time. Default 1MB. */\n  chunkSize?: number;\n}\n\n/**\n * Provide a stream of chunks from a File, Blob, or Uint8Array.\n * @param file The source File, Blob or Uint8Array.\n * @param options Optional settings controlling file reading.\n * @returns a lazy Iterator of Uint8Arrays containing sequential chunks of the\n *   input File, Blob or Uint8Array.\n */\nexport class FileChunkIterator extends ByteChunkIterator {\n  offset: number;\n  chunkSize: number;\n\n  constructor(\n      protected file: FileElement,\n      protected options: FileChunkIteratorOptions = {}) {\n    super();\n    util.assert(\n        (file instanceof Uint8Array) ||\n            (env().get('IS_BROWSER') ?\n                 (file instanceof File || file instanceof Blob) :\n                 false),\n        () => 'FileChunkIterator only supports File, Blob and Uint8Array ' +\n            'right now.');\n    this.offset = options.offset || 0;\n    // default 1MB chunk has tolerable perf on large files\n    this.chunkSize = options.chunkSize || 1024 * 1024;\n  }\n\n  summary() {\n    return `FileChunks ${this.file}`;\n  }\n\n  async next(): Promise<IteratorResult<Uint8Array>> {\n    if (this.offset >= ((this.file instanceof Uint8Array) ?\n                            this.file.byteLength :\n                            this.file.size)) {\n      return {value: null, done: true};\n    }\n    const chunk = new Promise<Uint8Array>((resolve, reject) => {\n      const end = this.offset + this.chunkSize;\n      if (this.file instanceof Uint8Array) {\n        // Note if end > this.uint8Array.byteLength, we just get a small last\n        // chunk.\n        resolve(new Uint8Array(this.file.slice(this.offset, end)));\n      } else {\n        // This branch assumes that this.file type is File or Blob, which\n        // means it is in the browser environment.\n\n        // TODO(soergel): is this a performance issue?\n        const fileReader = new FileReader();\n        fileReader.onload = (event) => {\n          let data: string|ArrayBuffer|Uint8Array = fileReader.result;\n          // Not sure we can trust the return type of\n          // FileReader.readAsArrayBuffer See e.g.\n          // https://github.com/node-file-api/FileReader/issues/2\n          if (data instanceof ArrayBuffer) {\n            data = new Uint8Array(data);\n          }\n          if (!(data instanceof Uint8Array)) {\n            return reject(new TypeError('FileReader returned unknown type.'));\n          }\n          resolve(data);\n        };\n        fileReader.onabort = (event) => {\n          return reject(new Error('Aborted'));\n        };\n        fileReader.onerror = (event) => {\n          return reject(new Error(event.type));\n        };\n        // TODO(soergel): better handle onabort, onerror\n        // Note if end > this.file.size, we just get a small last chunk.\n        const slice = this.file.slice(this.offset, end);\n        // We can't use readAsText here (even if we know the file is text)\n        // because the slice boundary may fall within a multi-byte character.\n        fileReader.readAsArrayBuffer(slice);\n      }\n      this.offset = end;\n    });\n    return {value: (await chunk), done: false};\n  }\n}\n"]}